xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e2ee2a47cae8248a2ea96b9d6f35ff3f37ecfc00)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   if (!a->rank) {
1529     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1537 {
1538   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1543   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1549 {
1550   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatEqual_MPIAIJ"
1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1561 {
1562   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1563   Mat            a,b,c,d;
1564   PetscTruth     flg;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   a = matA->A; b = matA->B;
1569   c = matB->A; d = matB->B;
1570 
1571   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1572   if (flg) {
1573     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1574   }
1575   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatCopy_MPIAIJ"
1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1582 {
1583   PetscErrorCode ierr;
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1585   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1586 
1587   PetscFunctionBegin;
1588   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1589   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1590     /* because of the column compression in the off-processor part of the matrix a->B,
1591        the number of columns in a->B and b->B may be different, hence we cannot call
1592        the MatCopy() directly on the two parts. If need be, we can provide a more
1593        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1594        then copying the submatrices */
1595     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1596   } else {
1597     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1598     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1606 {
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #include "petscblaslapack.h"
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatAXPY_MPIAIJ"
1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1618 {
1619   PetscErrorCode ierr;
1620   PetscInt       i;
1621   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1622   PetscBLASInt   bnz,one=1;
1623   Mat_SeqAIJ     *x,*y;
1624 
1625   PetscFunctionBegin;
1626   if (str == SAME_NONZERO_PATTERN) {
1627     PetscScalar alpha = a;
1628     x = (Mat_SeqAIJ *)xx->A->data;
1629     y = (Mat_SeqAIJ *)yy->A->data;
1630     bnz = (PetscBLASInt)x->nz;
1631     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     bnz = (PetscBLASInt)x->nz;
1635     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1636   } else if (str == SUBSET_NONZERO_PATTERN) {
1637     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1638 
1639     x = (Mat_SeqAIJ *)xx->B->data;
1640     y = (Mat_SeqAIJ *)yy->B->data;
1641     if (y->xtoy && y->XtoY != xx->B) {
1642       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1643       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1644     }
1645     if (!y->xtoy) { /* get xtoy */
1646       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1647       y->XtoY = xx->B;
1648     }
1649     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1650   } else {
1651     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatConjugate_MPIAIJ"
1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1661 {
1662 #if defined(PETSC_USE_COMPLEX)
1663   PetscErrorCode ierr;
1664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1668   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1669 #else
1670   PetscFunctionBegin;
1671 #endif
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatRealPart_MPIAIJ"
1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1678 {
1679   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1680   PetscErrorCode ierr;
1681 
1682   PetscFunctionBegin;
1683   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1684   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1691 {
1692   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1697   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 /* -------------------------------------------------------------------*/
1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1703        MatGetRow_MPIAIJ,
1704        MatRestoreRow_MPIAIJ,
1705        MatMult_MPIAIJ,
1706 /* 4*/ MatMultAdd_MPIAIJ,
1707        MatMultTranspose_MPIAIJ,
1708        MatMultTransposeAdd_MPIAIJ,
1709        0,
1710        0,
1711        0,
1712 /*10*/ 0,
1713        0,
1714        0,
1715        MatRelax_MPIAIJ,
1716        MatTranspose_MPIAIJ,
1717 /*15*/ MatGetInfo_MPIAIJ,
1718        MatEqual_MPIAIJ,
1719        MatGetDiagonal_MPIAIJ,
1720        MatDiagonalScale_MPIAIJ,
1721        MatNorm_MPIAIJ,
1722 /*20*/ MatAssemblyBegin_MPIAIJ,
1723        MatAssemblyEnd_MPIAIJ,
1724        0,
1725        MatSetOption_MPIAIJ,
1726        MatZeroEntries_MPIAIJ,
1727 /*25*/ MatZeroRows_MPIAIJ,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*30*/ MatSetUpPreallocation_MPIAIJ,
1733        0,
1734        0,
1735        0,
1736        0,
1737 /*35*/ MatDuplicate_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*40*/ MatAXPY_MPIAIJ,
1743        MatGetSubMatrices_MPIAIJ,
1744        MatIncreaseOverlap_MPIAIJ,
1745        MatGetValues_MPIAIJ,
1746        MatCopy_MPIAIJ,
1747 /*45*/ MatPrintHelp_MPIAIJ,
1748        MatScale_MPIAIJ,
1749        0,
1750        0,
1751        0,
1752 /*50*/ MatSetBlockSize_MPIAIJ,
1753        0,
1754        0,
1755        0,
1756        0,
1757 /*55*/ MatFDColoringCreate_MPIAIJ,
1758        0,
1759        MatSetUnfactored_MPIAIJ,
1760        MatPermute_MPIAIJ,
1761        0,
1762 /*60*/ MatGetSubMatrix_MPIAIJ,
1763        MatDestroy_MPIAIJ,
1764        MatView_MPIAIJ,
1765        0,
1766        0,
1767 /*65*/ 0,
1768        0,
1769        0,
1770        0,
1771        0,
1772 /*70*/ 0,
1773        0,
1774        MatSetColoring_MPIAIJ,
1775 #if defined(PETSC_HAVE_ADIC)
1776        MatSetValuesAdic_MPIAIJ,
1777 #else
1778        0,
1779 #endif
1780        MatSetValuesAdifor_MPIAIJ,
1781 /*75*/ 0,
1782        0,
1783        0,
1784        0,
1785        0,
1786 /*80*/ 0,
1787        0,
1788        0,
1789        0,
1790 /*84*/ MatLoad_MPIAIJ,
1791        0,
1792        0,
1793        0,
1794        0,
1795        0,
1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1797        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1798        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1799        MatPtAP_Basic,
1800        MatPtAPSymbolic_MPIAIJ,
1801 /*95*/ MatPtAPNumeric_MPIAIJ,
1802        0,
1803        0,
1804        0,
1805        0,
1806 /*100*/0,
1807        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1808        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1809        MatConjugate_MPIAIJ,
1810        0,
1811 /*105*/MatSetValuesRow_MPIAIJ,
1812        MatRealPart_MPIAIJ,
1813        MatImaginaryPart_MPIAIJ};
1814 
1815 /* ----------------------------------------------------------------------------------------*/
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1821 {
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1827   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 EXTERN_C_BEGIN
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1836 {
1837   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1842   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 EXTERN_C_END
1846 
1847 #include "petscpc.h"
1848 EXTERN_C_BEGIN
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1852 {
1853   Mat_MPIAIJ     *b;
1854   PetscErrorCode ierr;
1855   PetscInt       i;
1856 
1857   PetscFunctionBegin;
1858   B->preallocated = PETSC_TRUE;
1859   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1860   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1861   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1862   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1863 
1864   B->rmap.bs = B->cmap.bs = 1;
1865   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1866   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1867   if (d_nnz) {
1868     for (i=0; i<B->rmap.n; i++) {
1869       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1870     }
1871   }
1872   if (o_nnz) {
1873     for (i=0; i<B->rmap.n; i++) {
1874       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1875     }
1876   }
1877   b = (Mat_MPIAIJ*)B->data;
1878 
1879   /* Explicitly create 2 MATSEQAIJ matrices. */
1880   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1881   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1882   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1883   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1884   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1885   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1886   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1887   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1888 
1889   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1890   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1891 
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1899 {
1900   Mat            mat;
1901   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   *newmat       = 0;
1906   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1907   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1908   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1909   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1910   a    = (Mat_MPIAIJ*)mat->data;
1911 
1912   mat->factor       = matin->factor;
1913   mat->rmap.bs      = matin->rmap.bs;
1914   mat->assembled    = PETSC_TRUE;
1915   mat->insertmode   = NOT_SET_VALUES;
1916   mat->preallocated = PETSC_TRUE;
1917 
1918   a->size           = oldmat->size;
1919   a->rank           = oldmat->rank;
1920   a->donotstash     = oldmat->donotstash;
1921   a->roworiented    = oldmat->roworiented;
1922   a->rowindices     = 0;
1923   a->rowvalues      = 0;
1924   a->getrowactive   = PETSC_FALSE;
1925 
1926   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1927   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1928 
1929   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1935     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1936     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     PetscInt len;
1941     len  = oldmat->B->cmap.n;
1942     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1944     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1945   } else a->garray = 0;
1946 
1947   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1949   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1953   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1955   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscsys.h"
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLoad_MPIAIJ"
1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1965 {
1966   Mat            A;
1967   PetscScalar    *vals,*svals;
1968   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1969   MPI_Status     status;
1970   PetscErrorCode ierr;
1971   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1972   PetscInt       i,nz,j,rstart,rend,mmax;
1973   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1974   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1975   PetscInt       cend,cstart,n,*rowners;
1976   int            fd;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1985   }
1986 
1987   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1988   M = header[1]; N = header[2];
1989   /* determine ownership of all rows */
1990   m    = M/size + ((M % size) > rank);
1991   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1992   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1993 
1994   /* First process needs enough room for process with most rows */
1995   if (!rank) {
1996     mmax       = rowners[1];
1997     for (i=2; i<size; i++) {
1998       mmax = PetscMax(mmax,rowners[i]);
1999     }
2000   } else mmax = m;
2001 
2002   rowners[0] = 0;
2003   for (i=2; i<=size; i++) {
2004     mmax       = PetscMax(mmax,rowners[i]);
2005     rowners[i] += rowners[i-1];
2006   }
2007   rstart = rowners[rank];
2008   rend   = rowners[rank+1];
2009 
2010   /* distribute row lengths to all processors */
2011   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2012   if (!rank) {
2013     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2014     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2015     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2016     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2017     for (j=0; j<m; j++) {
2018       procsnz[0] += ourlens[j];
2019     }
2020     for (i=1; i<size; i++) {
2021       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2022       /* calculate the number of nonzeros on each processor */
2023       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2024         procsnz[i] += rowlengths[j];
2025       }
2026       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2027     }
2028     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2029   } else {
2030     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2031   }
2032 
2033   if (!rank) {
2034     /* determine max buffer needed and allocate it */
2035     maxnz = 0;
2036     for (i=0; i<size; i++) {
2037       maxnz = PetscMax(maxnz,procsnz[i]);
2038     }
2039     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix column indices  */
2042     nz   = procsnz[0];
2043     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2045 
2046     /* read in every one elses and ship off */
2047     for (i=1; i<size; i++) {
2048       nz   = procsnz[i];
2049       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2050       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2051     }
2052     ierr = PetscFree(cols);CHKERRQ(ierr);
2053   } else {
2054     /* determine buffer space needed for message */
2055     nz = 0;
2056     for (i=0; i<m; i++) {
2057       nz += ourlens[i];
2058     }
2059     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2060 
2061     /* receive message of column indices*/
2062     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2063     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2064     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2065   }
2066 
2067   /* determine column ownership if matrix is not square */
2068   if (N != M) {
2069     n      = N/size + ((N % size) > rank);
2070     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     cstart = cend - n;
2072   } else {
2073     cstart = rstart;
2074     cend   = rend;
2075     n      = cend - cstart;
2076   }
2077 
2078   /* loop over local rows, determining number of off diagonal entries */
2079   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2080   jj = 0;
2081   for (i=0; i<m; i++) {
2082     for (j=0; j<ourlens[i]; j++) {
2083       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2084       jj++;
2085     }
2086   }
2087 
2088   /* create our matrix */
2089   for (i=0; i<m; i++) {
2090     ourlens[i] -= offlens[i];
2091   }
2092   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2093   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2094   ierr = MatSetType(A,type);CHKERRQ(ierr);
2095   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2096 
2097   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2098   for (i=0; i<m; i++) {
2099     ourlens[i] += offlens[i];
2100   }
2101 
2102   if (!rank) {
2103     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2104 
2105     /* read in my part of the matrix numerical values  */
2106     nz   = procsnz[0];
2107     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2108 
2109     /* insert into matrix */
2110     jj      = rstart;
2111     smycols = mycols;
2112     svals   = vals;
2113     for (i=0; i<m; i++) {
2114       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2115       smycols += ourlens[i];
2116       svals   += ourlens[i];
2117       jj++;
2118     }
2119 
2120     /* read in other processors and ship out */
2121     for (i=1; i<size; i++) {
2122       nz   = procsnz[i];
2123       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2124       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2127   } else {
2128     /* receive numeric values */
2129     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2130 
2131     /* receive message of values*/
2132     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2133     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2134     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2135 
2136     /* insert into matrix */
2137     jj      = rstart;
2138     smycols = mycols;
2139     svals   = vals;
2140     for (i=0; i<m; i++) {
2141       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2142       smycols += ourlens[i];
2143       svals   += ourlens[i];
2144       jj++;
2145     }
2146   }
2147   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2148   ierr = PetscFree(vals);CHKERRQ(ierr);
2149   ierr = PetscFree(mycols);CHKERRQ(ierr);
2150   ierr = PetscFree(rowners);CHKERRQ(ierr);
2151 
2152   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   *newmat = A;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2160 /*
2161     Not great since it makes two copies of the submatrix, first an SeqAIJ
2162   in local and then by concatenating the local matrices the end result.
2163   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2164 */
2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2166 {
2167   PetscErrorCode ierr;
2168   PetscMPIInt    rank,size;
2169   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2170   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2171   Mat            *local,M,Mreuse;
2172   PetscScalar    *vwork,*aa;
2173   MPI_Comm       comm = mat->comm;
2174   Mat_SeqAIJ     *aij;
2175 
2176 
2177   PetscFunctionBegin;
2178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2180 
2181   if (call ==  MAT_REUSE_MATRIX) {
2182     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2183     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2184     local = &Mreuse;
2185     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2186   } else {
2187     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2188     Mreuse = *local;
2189     ierr   = PetscFree(local);CHKERRQ(ierr);
2190   }
2191 
2192   /*
2193       m - number of local rows
2194       n - number of columns (same on all processors)
2195       rstart - first row in new global matrix generated
2196   */
2197   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2198   if (call == MAT_INITIAL_MATRIX) {
2199     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2200     ii  = aij->i;
2201     jj  = aij->j;
2202 
2203     /*
2204         Determine the number of non-zeros in the diagonal and off-diagonal
2205         portions of the matrix in order to do correct preallocation
2206     */
2207 
2208     /* first get start and end of "diagonal" columns */
2209     if (csize == PETSC_DECIDE) {
2210       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2211       if (mglobal == n) { /* square matrix */
2212 	nlocal = m;
2213       } else {
2214         nlocal = n/size + ((n % size) > rank);
2215       }
2216     } else {
2217       nlocal = csize;
2218     }
2219     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2220     rstart = rend - nlocal;
2221     if (rank == size - 1 && rend != n) {
2222       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2223     }
2224 
2225     /* next, compute all the lengths */
2226     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2227     olens = dlens + m;
2228     for (i=0; i<m; i++) {
2229       jend = ii[i+1] - ii[i];
2230       olen = 0;
2231       dlen = 0;
2232       for (j=0; j<jend; j++) {
2233         if (*jj < rstart || *jj >= rend) olen++;
2234         else dlen++;
2235         jj++;
2236       }
2237       olens[i] = olen;
2238       dlens[i] = dlen;
2239     }
2240     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2241     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2242     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2244     ierr = PetscFree(dlens);CHKERRQ(ierr);
2245   } else {
2246     PetscInt ml,nl;
2247 
2248     M = *newmat;
2249     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2250     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2251     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2252     /*
2253          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2254        rather than the slower MatSetValues().
2255     */
2256     M->was_assembled = PETSC_TRUE;
2257     M->assembled     = PETSC_FALSE;
2258   }
2259   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2260   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2261   ii  = aij->i;
2262   jj  = aij->j;
2263   aa  = aij->a;
2264   for (i=0; i<m; i++) {
2265     row   = rstart + i;
2266     nz    = ii[i+1] - ii[i];
2267     cwork = jj;     jj += nz;
2268     vwork = aa;     aa += nz;
2269     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2270   }
2271 
2272   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   *newmat = M;
2275 
2276   /* save submatrix used in processor for next request */
2277   if (call ==  MAT_INITIAL_MATRIX) {
2278     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2279     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2280   }
2281 
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 EXTERN_C_BEGIN
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2289 {
2290   PetscInt       m,cstart, cend,j,nnz,i,d;
2291   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2292   const PetscInt *JJ;
2293   PetscScalar    *values;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2298 
2299   B->rmap.bs = B->cmap.bs = 1;
2300   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2301   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2302   m      = B->rmap.n;
2303   cstart = B->cmap.rstart;
2304   cend   = B->cmap.rend;
2305   rstart = B->rmap.rstart;
2306 
2307   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2308   o_nnz = d_nnz + m;
2309 
2310   for (i=0; i<m; i++) {
2311     nnz     = I[i+1]- I[i];
2312     JJ      = J + I[i];
2313     nnz_max = PetscMax(nnz_max,nnz);
2314     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2315     for (j=0; j<nnz; j++) {
2316       if (*JJ >= cstart) break;
2317       JJ++;
2318     }
2319     d = 0;
2320     for (; j<nnz; j++) {
2321       if (*JJ++ >= cend) break;
2322       d++;
2323     }
2324     d_nnz[i] = d;
2325     o_nnz[i] = nnz - d;
2326   }
2327   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2329 
2330   if (v) values = (PetscScalar*)v;
2331   else {
2332     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2333     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2334   }
2335 
2336   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2337   for (i=0; i<m; i++) {
2338     ii   = i + rstart;
2339     nnz  = I[i+1]- I[i];
2340     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2341   }
2342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2345 
2346   if (!v) {
2347     ierr = PetscFree(values);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2355 /*@
2356    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2357    (the default parallel PETSc format).
2358 
2359    Collective on MPI_Comm
2360 
2361    Input Parameters:
2362 +  B - the matrix
2363 .  i - the indices into j for the start of each local row (starts with zero)
2364 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2365 -  v - optional values in the matrix
2366 
2367    Level: developer
2368 
2369    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2370      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2371 
2372 .keywords: matrix, aij, compressed row, sparse, parallel
2373 
2374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2375           MatCreateSeqAIJWithArrays()
2376 @*/
2377 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2378 {
2379   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2380 
2381   PetscFunctionBegin;
2382   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2383   if (f) {
2384     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2391 /*@C
2392    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2393    (the default parallel PETSc format).  For good matrix assembly performance
2394    the user should preallocate the matrix storage by setting the parameters
2395    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2396    performance can be increased by more than a factor of 50.
2397 
2398    Collective on MPI_Comm
2399 
2400    Input Parameters:
2401 +  A - the matrix
2402 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2403            (same value is used for all local rows)
2404 .  d_nnz - array containing the number of nonzeros in the various rows of the
2405            DIAGONAL portion of the local submatrix (possibly different for each row)
2406            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2407            The size of this array is equal to the number of local rows, i.e 'm'.
2408            You must leave room for the diagonal entry even if it is zero.
2409 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2410            submatrix (same value is used for all local rows).
2411 -  o_nnz - array containing the number of nonzeros in the various rows of the
2412            OFF-DIAGONAL portion of the local submatrix (possibly different for
2413            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2414            structure. The size of this array is equal to the number
2415            of local rows, i.e 'm'.
2416 
2417    If the *_nnz parameter is given then the *_nz parameter is ignored
2418 
2419    The AIJ format (also called the Yale sparse matrix format or
2420    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2421    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2422 
2423    The parallel matrix is partitioned such that the first m0 rows belong to
2424    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2425    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2426 
2427    The DIAGONAL portion of the local submatrix of a processor can be defined
2428    as the submatrix which is obtained by extraction the part corresponding
2429    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2430    first row that belongs to the processor, and r2 is the last row belonging
2431    to the this processor. This is a square mxm matrix. The remaining portion
2432    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2433 
2434    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2435 
2436    Example usage:
2437 
2438    Consider the following 8x8 matrix with 34 non-zero values, that is
2439    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2440    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2441    as follows:
2442 
2443 .vb
2444             1  2  0  |  0  3  0  |  0  4
2445     Proc0   0  5  6  |  7  0  0  |  8  0
2446             9  0 10  | 11  0  0  | 12  0
2447     -------------------------------------
2448            13  0 14  | 15 16 17  |  0  0
2449     Proc1   0 18  0  | 19 20 21  |  0  0
2450             0  0  0  | 22 23  0  | 24  0
2451     -------------------------------------
2452     Proc2  25 26 27  |  0  0 28  | 29  0
2453            30  0  0  | 31 32 33  |  0 34
2454 .ve
2455 
2456    This can be represented as a collection of submatrices as:
2457 
2458 .vb
2459       A B C
2460       D E F
2461       G H I
2462 .ve
2463 
2464    Where the submatrices A,B,C are owned by proc0, D,E,F are
2465    owned by proc1, G,H,I are owned by proc2.
2466 
2467    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2468    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2469    The 'M','N' parameters are 8,8, and have the same values on all procs.
2470 
2471    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2472    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2473    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2474    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2475    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2476    matrix, ans [DF] as another SeqAIJ matrix.
2477 
2478    When d_nz, o_nz parameters are specified, d_nz storage elements are
2479    allocated for every row of the local diagonal submatrix, and o_nz
2480    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2481    One way to choose d_nz and o_nz is to use the max nonzerors per local
2482    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2483    In this case, the values of d_nz,o_nz are:
2484 .vb
2485      proc0 : dnz = 2, o_nz = 2
2486      proc1 : dnz = 3, o_nz = 2
2487      proc2 : dnz = 1, o_nz = 4
2488 .ve
2489    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2490    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2491    for proc3. i.e we are using 12+15+10=37 storage locations to store
2492    34 values.
2493 
2494    When d_nnz, o_nnz parameters are specified, the storage is specified
2495    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2496    In the above case the values for d_nnz,o_nnz are:
2497 .vb
2498      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2499      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2500      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2501 .ve
2502    Here the space allocated is sum of all the above values i.e 34, and
2503    hence pre-allocation is perfect.
2504 
2505    Level: intermediate
2506 
2507 .keywords: matrix, aij, compressed row, sparse, parallel
2508 
2509 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2510           MPIAIJ
2511 @*/
2512 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2513 {
2514   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2515 
2516   PetscFunctionBegin;
2517   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2518   if (f) {
2519     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2526 /*@C
2527      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2528          CSR format the local rows.
2529 
2530    Collective on MPI_Comm
2531 
2532    Input Parameters:
2533 +  comm - MPI communicator
2534 .  m - number of local rows (Cannot be PETSC_DECIDE)
2535 .  n - This value should be the same as the local size used in creating the
2536        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2537        calculated if N is given) For square matrices n is almost always m.
2538 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2539 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2540 .   i - row indices
2541 .   j - column indices
2542 -   a - matrix values
2543 
2544    Output Parameter:
2545 .   mat - the matrix
2546    Level: intermediate
2547 
2548    Notes:
2549        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2550      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2551      called this routine.
2552 
2553        The i and j indices are 0 based
2554 
2555 .keywords: matrix, aij, compressed row, sparse, parallel
2556 
2557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2558           MPIAIJ, MatCreateMPIAIJ()
2559 @*/
2560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2561 {
2562   PetscErrorCode ierr;
2563 
2564  PetscFunctionBegin;
2565   if (i[0]) {
2566     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2567   }
2568   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2569   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2570   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2571   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatCreateMPIAIJ"
2577 /*@C
2578    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2579    (the default parallel PETSc format).  For good matrix assembly performance
2580    the user should preallocate the matrix storage by setting the parameters
2581    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2582    performance can be increased by more than a factor of 50.
2583 
2584    Collective on MPI_Comm
2585 
2586    Input Parameters:
2587 +  comm - MPI communicator
2588 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2589            This value should be the same as the local size used in creating the
2590            y vector for the matrix-vector product y = Ax.
2591 .  n - This value should be the same as the local size used in creating the
2592        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2593        calculated if N is given) For square matrices n is almost always m.
2594 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2595 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2596 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2597            (same value is used for all local rows)
2598 .  d_nnz - array containing the number of nonzeros in the various rows of the
2599            DIAGONAL portion of the local submatrix (possibly different for each row)
2600            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2601            The size of this array is equal to the number of local rows, i.e 'm'.
2602            You must leave room for the diagonal entry even if it is zero.
2603 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2604            submatrix (same value is used for all local rows).
2605 -  o_nnz - array containing the number of nonzeros in the various rows of the
2606            OFF-DIAGONAL portion of the local submatrix (possibly different for
2607            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2608            structure. The size of this array is equal to the number
2609            of local rows, i.e 'm'.
2610 
2611    Output Parameter:
2612 .  A - the matrix
2613 
2614    Notes:
2615    If the *_nnz parameter is given then the *_nz parameter is ignored
2616 
2617    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2618    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2619    storage requirements for this matrix.
2620 
2621    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2622    processor than it must be used on all processors that share the object for
2623    that argument.
2624 
2625    The user MUST specify either the local or global matrix dimensions
2626    (possibly both).
2627 
2628    The parallel matrix is partitioned such that the first m0 rows belong to
2629    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2630    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2631 
2632    The DIAGONAL portion of the local submatrix of a processor can be defined
2633    as the submatrix which is obtained by extraction the part corresponding
2634    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2635    first row that belongs to the processor, and r2 is the last row belonging
2636    to the this processor. This is a square mxm matrix. The remaining portion
2637    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2638 
2639    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2640 
2641    When calling this routine with a single process communicator, a matrix of
2642    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2643    type of communicator, use the construction mechanism:
2644      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2645 
2646    By default, this format uses inodes (identical nodes) when possible.
2647    We search for consecutive rows with the same nonzero structure, thereby
2648    reusing matrix information to achieve increased efficiency.
2649 
2650    Options Database Keys:
2651 +  -mat_no_inode  - Do not use inodes
2652 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2653 -  -mat_aij_oneindex - Internally use indexing starting at 1
2654         rather than 0.  Note that when calling MatSetValues(),
2655         the user still MUST index entries starting at 0!
2656 
2657 
2658    Example usage:
2659 
2660    Consider the following 8x8 matrix with 34 non-zero values, that is
2661    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2662    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2663    as follows:
2664 
2665 .vb
2666             1  2  0  |  0  3  0  |  0  4
2667     Proc0   0  5  6  |  7  0  0  |  8  0
2668             9  0 10  | 11  0  0  | 12  0
2669     -------------------------------------
2670            13  0 14  | 15 16 17  |  0  0
2671     Proc1   0 18  0  | 19 20 21  |  0  0
2672             0  0  0  | 22 23  0  | 24  0
2673     -------------------------------------
2674     Proc2  25 26 27  |  0  0 28  | 29  0
2675            30  0  0  | 31 32 33  |  0 34
2676 .ve
2677 
2678    This can be represented as a collection of submatrices as:
2679 
2680 .vb
2681       A B C
2682       D E F
2683       G H I
2684 .ve
2685 
2686    Where the submatrices A,B,C are owned by proc0, D,E,F are
2687    owned by proc1, G,H,I are owned by proc2.
2688 
2689    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2690    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2691    The 'M','N' parameters are 8,8, and have the same values on all procs.
2692 
2693    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2694    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2695    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2696    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2697    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2698    matrix, ans [DF] as another SeqAIJ matrix.
2699 
2700    When d_nz, o_nz parameters are specified, d_nz storage elements are
2701    allocated for every row of the local diagonal submatrix, and o_nz
2702    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2703    One way to choose d_nz and o_nz is to use the max nonzerors per local
2704    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2705    In this case, the values of d_nz,o_nz are:
2706 .vb
2707      proc0 : dnz = 2, o_nz = 2
2708      proc1 : dnz = 3, o_nz = 2
2709      proc2 : dnz = 1, o_nz = 4
2710 .ve
2711    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2712    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2713    for proc3. i.e we are using 12+15+10=37 storage locations to store
2714    34 values.
2715 
2716    When d_nnz, o_nnz parameters are specified, the storage is specified
2717    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2718    In the above case the values for d_nnz,o_nnz are:
2719 .vb
2720      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2721      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2722      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2723 .ve
2724    Here the space allocated is sum of all the above values i.e 34, and
2725    hence pre-allocation is perfect.
2726 
2727    Level: intermediate
2728 
2729 .keywords: matrix, aij, compressed row, sparse, parallel
2730 
2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2732           MPIAIJ, MatCreateMPIAIJWithArrays()
2733 @*/
2734 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2735 {
2736   PetscErrorCode ierr;
2737   PetscMPIInt    size;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2741   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2742   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2743   if (size > 1) {
2744     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2745     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2746   } else {
2747     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2748     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2749   }
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2756 {
2757   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2758 
2759   PetscFunctionBegin;
2760   *Ad     = a->A;
2761   *Ao     = a->B;
2762   *colmap = a->garray;
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2769 {
2770   PetscErrorCode ierr;
2771   PetscInt       i;
2772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2773 
2774   PetscFunctionBegin;
2775   if (coloring->ctype == IS_COLORING_LOCAL) {
2776     ISColoringValue *allcolors,*colors;
2777     ISColoring      ocoloring;
2778 
2779     /* set coloring for diagonal portion */
2780     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2781 
2782     /* set coloring for off-diagonal portion */
2783     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2784     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2785     for (i=0; i<a->B->cmap.n; i++) {
2786       colors[i] = allcolors[a->garray[i]];
2787     }
2788     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2789     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2790     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2791     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2792   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2793     ISColoringValue *colors;
2794     PetscInt        *larray;
2795     ISColoring      ocoloring;
2796 
2797     /* set coloring for diagonal portion */
2798     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2799     for (i=0; i<a->A->cmap.n; i++) {
2800       larray[i] = i + A->cmap.rstart;
2801     }
2802     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2803     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2804     for (i=0; i<a->A->cmap.n; i++) {
2805       colors[i] = coloring->colors[larray[i]];
2806     }
2807     ierr = PetscFree(larray);CHKERRQ(ierr);
2808     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2809     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2810     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2811 
2812     /* set coloring for off-diagonal portion */
2813     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2814     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2815     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2816     for (i=0; i<a->B->cmap.n; i++) {
2817       colors[i] = coloring->colors[larray[i]];
2818     }
2819     ierr = PetscFree(larray);CHKERRQ(ierr);
2820     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2821     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2822     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2823   } else {
2824     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2825   }
2826 
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #if defined(PETSC_HAVE_ADIC)
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2833 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2834 {
2835   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2840   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 #endif
2844 
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2847 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2848 {
2849   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2850   PetscErrorCode ierr;
2851 
2852   PetscFunctionBegin;
2853   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2854   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatMerge"
2860 /*@C
2861       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2862                  matrices from each processor
2863 
2864     Collective on MPI_Comm
2865 
2866    Input Parameters:
2867 +    comm - the communicators the parallel matrix will live on
2868 .    inmat - the input sequential matrices
2869 .    n - number of local columns (or PETSC_DECIDE)
2870 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2871 
2872    Output Parameter:
2873 .    outmat - the parallel matrix generated
2874 
2875     Level: advanced
2876 
2877    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2878 
2879 @*/
2880 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2881 {
2882   PetscErrorCode ierr;
2883   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2884   PetscInt       *indx;
2885   PetscScalar    *values;
2886 
2887   PetscFunctionBegin;
2888   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2889   if (scall == MAT_INITIAL_MATRIX){
2890     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2891     if (n == PETSC_DECIDE){
2892       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2893     }
2894     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2895     rstart -= m;
2896 
2897     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2898     for (i=0;i<m;i++) {
2899       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2900       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2901       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2902     }
2903     /* This routine will ONLY return MPIAIJ type matrix */
2904     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2905     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2906     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2907     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2908     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2909 
2910   } else if (scall == MAT_REUSE_MATRIX){
2911     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2912   } else {
2913     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2914   }
2915 
2916   for (i=0;i<m;i++) {
2917     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2918     I    = i + rstart;
2919     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2920     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2921   }
2922   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2923   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925 
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 #undef __FUNCT__
2930 #define __FUNCT__ "MatFileSplit"
2931 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2932 {
2933   PetscErrorCode    ierr;
2934   PetscMPIInt       rank;
2935   PetscInt          m,N,i,rstart,nnz;
2936   size_t            len;
2937   const PetscInt    *indx;
2938   PetscViewer       out;
2939   char              *name;
2940   Mat               B;
2941   const PetscScalar *values;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2945   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2946   /* Should this be the type of the diagonal block of A? */
2947   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2948   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2949   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2950   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2951   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2952   for (i=0;i<m;i++) {
2953     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2954     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2955     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2956   }
2957   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959 
2960   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2961   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2962   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2963   sprintf(name,"%s.%d",outfile,rank);
2964   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2965   ierr = PetscFree(name);
2966   ierr = MatView(B,out);CHKERRQ(ierr);
2967   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2968   ierr = MatDestroy(B);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2975 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2976 {
2977   PetscErrorCode       ierr;
2978   Mat_Merge_SeqsToMPI  *merge;
2979   PetscObjectContainer container;
2980 
2981   PetscFunctionBegin;
2982   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2983   if (container) {
2984     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2985     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2986     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2987     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2988     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2989     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2990     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2991     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2992     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2993     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2994     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2995     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2996 
2997     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2998     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2999   }
3000   ierr = PetscFree(merge);CHKERRQ(ierr);
3001 
3002   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #include "src/mat/utils/freespace.h"
3007 #include "petscbt.h"
3008 static PetscEvent logkey_seqstompinum = 0;
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3011 /*@C
3012       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3013                  matrices from each processor
3014 
3015     Collective on MPI_Comm
3016 
3017    Input Parameters:
3018 +    comm - the communicators the parallel matrix will live on
3019 .    seqmat - the input sequential matrices
3020 .    m - number of local rows (or PETSC_DECIDE)
3021 .    n - number of local columns (or PETSC_DECIDE)
3022 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3023 
3024    Output Parameter:
3025 .    mpimat - the parallel matrix generated
3026 
3027     Level: advanced
3028 
3029    Notes:
3030      The dimensions of the sequential matrix in each processor MUST be the same.
3031      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3032      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3033 @*/
3034 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3035 {
3036   PetscErrorCode       ierr;
3037   MPI_Comm             comm=mpimat->comm;
3038   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3039   PetscMPIInt          size,rank,taga,*len_s;
3040   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3041   PetscInt             proc,m;
3042   PetscInt             **buf_ri,**buf_rj;
3043   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3044   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3045   MPI_Request          *s_waits,*r_waits;
3046   MPI_Status           *status;
3047   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3048   Mat_Merge_SeqsToMPI  *merge;
3049   PetscObjectContainer container;
3050 
3051   PetscFunctionBegin;
3052   if (!logkey_seqstompinum) {
3053     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3054   }
3055   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3056 
3057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3058   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3059 
3060   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3061   if (container) {
3062     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3063   }
3064   bi     = merge->bi;
3065   bj     = merge->bj;
3066   buf_ri = merge->buf_ri;
3067   buf_rj = merge->buf_rj;
3068 
3069   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3070   owners = merge->rowmap.range;
3071   len_s  = merge->len_s;
3072 
3073   /* send and recv matrix values */
3074   /*-----------------------------*/
3075   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3076   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3077 
3078   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3079   for (proc=0,k=0; proc<size; proc++){
3080     if (!len_s[proc]) continue;
3081     i = owners[proc];
3082     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3083     k++;
3084   }
3085 
3086   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3087   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3088   ierr = PetscFree(status);CHKERRQ(ierr);
3089 
3090   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3091   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3092 
3093   /* insert mat values of mpimat */
3094   /*----------------------------*/
3095   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3096   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3097   nextrow = buf_ri_k + merge->nrecv;
3098   nextai  = nextrow + merge->nrecv;
3099 
3100   for (k=0; k<merge->nrecv; k++){
3101     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3102     nrows = *(buf_ri_k[k]);
3103     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3104     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3105   }
3106 
3107   /* set values of ba */
3108   m = merge->rowmap.n;
3109   for (i=0; i<m; i++) {
3110     arow = owners[rank] + i;
3111     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3112     bnzi = bi[i+1] - bi[i];
3113     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3114 
3115     /* add local non-zero vals of this proc's seqmat into ba */
3116     anzi = ai[arow+1] - ai[arow];
3117     aj   = a->j + ai[arow];
3118     aa   = a->a + ai[arow];
3119     nextaj = 0;
3120     for (j=0; nextaj<anzi; j++){
3121       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3122         ba_i[j] += aa[nextaj++];
3123       }
3124     }
3125 
3126     /* add received vals into ba */
3127     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3128       /* i-th row */
3129       if (i == *nextrow[k]) {
3130         anzi = *(nextai[k]+1) - *nextai[k];
3131         aj   = buf_rj[k] + *(nextai[k]);
3132         aa   = abuf_r[k] + *(nextai[k]);
3133         nextaj = 0;
3134         for (j=0; nextaj<anzi; j++){
3135           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3136             ba_i[j] += aa[nextaj++];
3137           }
3138         }
3139         nextrow[k]++; nextai[k]++;
3140       }
3141     }
3142     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3143   }
3144   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3145   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3146 
3147   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3148   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3149   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3150   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 static PetscEvent logkey_seqstompisym = 0;
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3157 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3158 {
3159   PetscErrorCode       ierr;
3160   Mat                  B_mpi;
3161   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3162   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3163   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3164   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3165   PetscInt             len,proc,*dnz,*onz;
3166   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3167   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3168   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3169   MPI_Status           *status;
3170   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3171   PetscBT              lnkbt;
3172   Mat_Merge_SeqsToMPI  *merge;
3173   PetscObjectContainer container;
3174 
3175   PetscFunctionBegin;
3176   if (!logkey_seqstompisym) {
3177     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3178   }
3179   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3180 
3181   /* make sure it is a PETSc comm */
3182   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3185 
3186   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3187   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3188 
3189   /* determine row ownership */
3190   /*---------------------------------------------------------*/
3191   merge->rowmap.n = m;
3192   merge->rowmap.N = M;
3193   merge->rowmap.bs = 1;
3194   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3195   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3197 
3198   m      = merge->rowmap.n;
3199   M      = merge->rowmap.N;
3200   owners = merge->rowmap.range;
3201 
3202   /* determine the number of messages to send, their lengths */
3203   /*---------------------------------------------------------*/
3204   len_s  = merge->len_s;
3205 
3206   len = 0;  /* length of buf_si[] */
3207   merge->nsend = 0;
3208   for (proc=0; proc<size; proc++){
3209     len_si[proc] = 0;
3210     if (proc == rank){
3211       len_s[proc] = 0;
3212     } else {
3213       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3214       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3215     }
3216     if (len_s[proc]) {
3217       merge->nsend++;
3218       nrows = 0;
3219       for (i=owners[proc]; i<owners[proc+1]; i++){
3220         if (ai[i+1] > ai[i]) nrows++;
3221       }
3222       len_si[proc] = 2*(nrows+1);
3223       len += len_si[proc];
3224     }
3225   }
3226 
3227   /* determine the number and length of messages to receive for ij-structure */
3228   /*-------------------------------------------------------------------------*/
3229   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3230   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3231 
3232   /* post the Irecv of j-structure */
3233   /*-------------------------------*/
3234   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3235   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3236 
3237   /* post the Isend of j-structure */
3238   /*--------------------------------*/
3239   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3240   sj_waits = si_waits + merge->nsend;
3241 
3242   for (proc=0, k=0; proc<size; proc++){
3243     if (!len_s[proc]) continue;
3244     i = owners[proc];
3245     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3246     k++;
3247   }
3248 
3249   /* receives and sends of j-structure are complete */
3250   /*------------------------------------------------*/
3251   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3252   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3253 
3254   /* send and recv i-structure */
3255   /*---------------------------*/
3256   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3257   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3258 
3259   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3260   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3261   for (proc=0,k=0; proc<size; proc++){
3262     if (!len_s[proc]) continue;
3263     /* form outgoing message for i-structure:
3264          buf_si[0]:                 nrows to be sent
3265                [1:nrows]:           row index (global)
3266                [nrows+1:2*nrows+1]: i-structure index
3267     */
3268     /*-------------------------------------------*/
3269     nrows = len_si[proc]/2 - 1;
3270     buf_si_i    = buf_si + nrows+1;
3271     buf_si[0]   = nrows;
3272     buf_si_i[0] = 0;
3273     nrows = 0;
3274     for (i=owners[proc]; i<owners[proc+1]; i++){
3275       anzi = ai[i+1] - ai[i];
3276       if (anzi) {
3277         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3278         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3279         nrows++;
3280       }
3281     }
3282     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3283     k++;
3284     buf_si += len_si[proc];
3285   }
3286 
3287   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3288   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3289 
3290   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3291   for (i=0; i<merge->nrecv; i++){
3292     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3293   }
3294 
3295   ierr = PetscFree(len_si);CHKERRQ(ierr);
3296   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3297   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3298   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3299   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3300   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3301   ierr = PetscFree(status);CHKERRQ(ierr);
3302 
3303   /* compute a local seq matrix in each processor */
3304   /*----------------------------------------------*/
3305   /* allocate bi array and free space for accumulating nonzero column info */
3306   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3307   bi[0] = 0;
3308 
3309   /* create and initialize a linked list */
3310   nlnk = N+1;
3311   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3312 
3313   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3314   len = 0;
3315   len  = ai[owners[rank+1]] - ai[owners[rank]];
3316   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3317   current_space = free_space;
3318 
3319   /* determine symbolic info for each local row */
3320   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3321   nextrow = buf_ri_k + merge->nrecv;
3322   nextai  = nextrow + merge->nrecv;
3323   for (k=0; k<merge->nrecv; k++){
3324     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3325     nrows = *buf_ri_k[k];
3326     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3327     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3328   }
3329 
3330   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3331   len = 0;
3332   for (i=0;i<m;i++) {
3333     bnzi   = 0;
3334     /* add local non-zero cols of this proc's seqmat into lnk */
3335     arow   = owners[rank] + i;
3336     anzi   = ai[arow+1] - ai[arow];
3337     aj     = a->j + ai[arow];
3338     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3339     bnzi += nlnk;
3340     /* add received col data into lnk */
3341     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3342       if (i == *nextrow[k]) { /* i-th row */
3343         anzi = *(nextai[k]+1) - *nextai[k];
3344         aj   = buf_rj[k] + *nextai[k];
3345         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3346         bnzi += nlnk;
3347         nextrow[k]++; nextai[k]++;
3348       }
3349     }
3350     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3351 
3352     /* if free space is not available, make more free space */
3353     if (current_space->local_remaining<bnzi) {
3354       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3355       nspacedouble++;
3356     }
3357     /* copy data into free space, then initialize lnk */
3358     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3359     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3360 
3361     current_space->array           += bnzi;
3362     current_space->local_used      += bnzi;
3363     current_space->local_remaining -= bnzi;
3364 
3365     bi[i+1] = bi[i] + bnzi;
3366   }
3367 
3368   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3369 
3370   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3371   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3373 
3374   /* create symbolic parallel matrix B_mpi */
3375   /*---------------------------------------*/
3376   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3377   if (n==PETSC_DECIDE) {
3378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3379   } else {
3380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3381   }
3382   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3383   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3385 
3386   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3387   B_mpi->assembled     = PETSC_FALSE;
3388   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3389   merge->bi            = bi;
3390   merge->bj            = bj;
3391   merge->buf_ri        = buf_ri;
3392   merge->buf_rj        = buf_rj;
3393   merge->coi           = PETSC_NULL;
3394   merge->coj           = PETSC_NULL;
3395   merge->owners_co     = PETSC_NULL;
3396 
3397   /* attach the supporting struct to B_mpi for reuse */
3398   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3399   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3400   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3401   *mpimat = B_mpi;
3402 
3403   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3404   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3405   PetscFunctionReturn(0);
3406 }
3407 
3408 static PetscEvent logkey_seqstompi = 0;
3409 #undef __FUNCT__
3410 #define __FUNCT__ "MatMerge_SeqsToMPI"
3411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3412 {
3413   PetscErrorCode   ierr;
3414 
3415   PetscFunctionBegin;
3416   if (!logkey_seqstompi) {
3417     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3418   }
3419   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3420   if (scall == MAT_INITIAL_MATRIX){
3421     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3422   }
3423   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3424   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3425   PetscFunctionReturn(0);
3426 }
3427 static PetscEvent logkey_getlocalmat = 0;
3428 #undef __FUNCT__
3429 #define __FUNCT__ "MatGetLocalMat"
3430 /*@C
3431      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3432 
3433     Not Collective
3434 
3435    Input Parameters:
3436 +    A - the matrix
3437 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3438 
3439    Output Parameter:
3440 .    A_loc - the local sequential matrix generated
3441 
3442     Level: developer
3443 
3444 @*/
3445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3446 {
3447   PetscErrorCode  ierr;
3448   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3449   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3450   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3451   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3452   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3453   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3454 
3455   PetscFunctionBegin;
3456   if (!logkey_getlocalmat) {
3457     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3458   }
3459   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3460   if (scall == MAT_INITIAL_MATRIX){
3461     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3462     ci[0] = 0;
3463     for (i=0; i<am; i++){
3464       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3465     }
3466     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3467     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3468     k = 0;
3469     for (i=0; i<am; i++) {
3470       ncols_o = bi[i+1] - bi[i];
3471       ncols_d = ai[i+1] - ai[i];
3472       /* off-diagonal portion of A */
3473       for (jo=0; jo<ncols_o; jo++) {
3474         col = cmap[*bj];
3475         if (col >= cstart) break;
3476         cj[k]   = col; bj++;
3477         ca[k++] = *ba++;
3478       }
3479       /* diagonal portion of A */
3480       for (j=0; j<ncols_d; j++) {
3481         cj[k]   = cstart + *aj++;
3482         ca[k++] = *aa++;
3483       }
3484       /* off-diagonal portion of A */
3485       for (j=jo; j<ncols_o; j++) {
3486         cj[k]   = cmap[*bj++];
3487         ca[k++] = *ba++;
3488       }
3489     }
3490     /* put together the new matrix */
3491     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3492     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3493     /* Since these are PETSc arrays, change flags to free them as necessary. */
3494     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3495     mat->freedata = PETSC_TRUE;
3496     mat->nonew    = 0;
3497   } else if (scall == MAT_REUSE_MATRIX){
3498     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3499     ci = mat->i; cj = mat->j; ca = mat->a;
3500     for (i=0; i<am; i++) {
3501       /* off-diagonal portion of A */
3502       ncols_o = bi[i+1] - bi[i];
3503       for (jo=0; jo<ncols_o; jo++) {
3504         col = cmap[*bj];
3505         if (col >= cstart) break;
3506         *ca++ = *ba++; bj++;
3507       }
3508       /* diagonal portion of A */
3509       ncols_d = ai[i+1] - ai[i];
3510       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3511       /* off-diagonal portion of A */
3512       for (j=jo; j<ncols_o; j++) {
3513         *ca++ = *ba++; bj++;
3514       }
3515     }
3516   } else {
3517     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3518   }
3519 
3520   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3521   PetscFunctionReturn(0);
3522 }
3523 
3524 static PetscEvent logkey_getlocalmatcondensed = 0;
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatGetLocalMatCondensed"
3527 /*@C
3528      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3529 
3530     Not Collective
3531 
3532    Input Parameters:
3533 +    A - the matrix
3534 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3535 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3536 
3537    Output Parameter:
3538 .    A_loc - the local sequential matrix generated
3539 
3540     Level: developer
3541 
3542 @*/
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3544 {
3545   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3546   PetscErrorCode    ierr;
3547   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3548   IS                isrowa,iscola;
3549   Mat               *aloc;
3550 
3551   PetscFunctionBegin;
3552   if (!logkey_getlocalmatcondensed) {
3553     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3554   }
3555   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3556   if (!row){
3557     start = A->rmap.rstart; end = A->rmap.rend;
3558     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3559   } else {
3560     isrowa = *row;
3561   }
3562   if (!col){
3563     start = A->cmap.rstart;
3564     cmap  = a->garray;
3565     nzA   = a->A->cmap.n;
3566     nzB   = a->B->cmap.n;
3567     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3568     ncols = 0;
3569     for (i=0; i<nzB; i++) {
3570       if (cmap[i] < start) idx[ncols++] = cmap[i];
3571       else break;
3572     }
3573     imark = i;
3574     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3575     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3576     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3577     ierr = PetscFree(idx);CHKERRQ(ierr);
3578   } else {
3579     iscola = *col;
3580   }
3581   if (scall != MAT_INITIAL_MATRIX){
3582     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3583     aloc[0] = *A_loc;
3584   }
3585   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3586   *A_loc = aloc[0];
3587   ierr = PetscFree(aloc);CHKERRQ(ierr);
3588   if (!row){
3589     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3590   }
3591   if (!col){
3592     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3593   }
3594   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3595   PetscFunctionReturn(0);
3596 }
3597 
3598 static PetscEvent logkey_GetBrowsOfAcols = 0;
3599 #undef __FUNCT__
3600 #define __FUNCT__ "MatGetBrowsOfAcols"
3601 /*@C
3602     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3603 
3604     Collective on Mat
3605 
3606    Input Parameters:
3607 +    A,B - the matrices in mpiaij format
3608 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3609 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3610 
3611    Output Parameter:
3612 +    rowb, colb - index sets of rows and columns of B to extract
3613 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3614 -    B_seq - the sequential matrix generated
3615 
3616     Level: developer
3617 
3618 @*/
3619 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3620 {
3621   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3622   PetscErrorCode    ierr;
3623   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3624   IS                isrowb,iscolb;
3625   Mat               *bseq;
3626 
3627   PetscFunctionBegin;
3628   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3629     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3630   }
3631   if (!logkey_GetBrowsOfAcols) {
3632     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3633   }
3634   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3635 
3636   if (scall == MAT_INITIAL_MATRIX){
3637     start = A->cmap.rstart;
3638     cmap  = a->garray;
3639     nzA   = a->A->cmap.n;
3640     nzB   = a->B->cmap.n;
3641     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3642     ncols = 0;
3643     for (i=0; i<nzB; i++) {  /* row < local row index */
3644       if (cmap[i] < start) idx[ncols++] = cmap[i];
3645       else break;
3646     }
3647     imark = i;
3648     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3649     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3650     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3651     ierr = PetscFree(idx);CHKERRQ(ierr);
3652     *brstart = imark;
3653     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3654   } else {
3655     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3656     isrowb = *rowb; iscolb = *colb;
3657     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3658     bseq[0] = *B_seq;
3659   }
3660   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3661   *B_seq = bseq[0];
3662   ierr = PetscFree(bseq);CHKERRQ(ierr);
3663   if (!rowb){
3664     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3665   } else {
3666     *rowb = isrowb;
3667   }
3668   if (!colb){
3669     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3670   } else {
3671     *colb = iscolb;
3672   }
3673   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3674   PetscFunctionReturn(0);
3675 }
3676 
3677 static PetscEvent logkey_GetBrowsOfAocols = 0;
3678 #undef __FUNCT__
3679 #define __FUNCT__ "MatGetBrowsOfAoCols"
3680 /*@C
3681     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3682     of the OFF-DIAGONAL portion of local A
3683 
3684     Collective on Mat
3685 
3686    Input Parameters:
3687 +    A,B - the matrices in mpiaij format
3688 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3689 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3690 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3691 
3692    Output Parameter:
3693 +    B_oth - the sequential matrix generated
3694 
3695     Level: developer
3696 
3697 @*/
3698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3699 {
3700   VecScatter_MPI_General *gen_to,*gen_from;
3701   PetscErrorCode         ierr;
3702   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3703   Mat_SeqAIJ             *b_oth;
3704   VecScatter             ctx=a->Mvctx;
3705   MPI_Comm               comm=ctx->comm;
3706   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3707   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3708   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3709   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3710   MPI_Request            *rwaits,*swaits;
3711   MPI_Status             *sstatus,rstatus;
3712   PetscInt               *cols;
3713   PetscScalar            *vals;
3714   PetscMPIInt            j;
3715 
3716   PetscFunctionBegin;
3717   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3718     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3719   }
3720   if (!logkey_GetBrowsOfAocols) {
3721     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3722   }
3723   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3724   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3725 
3726   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3727   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3728   rvalues  = gen_from->values; /* holds the length of sending row */
3729   svalues  = gen_to->values;   /* holds the length of receiving row */
3730   nrecvs   = gen_from->n;
3731   nsends   = gen_to->n;
3732 
3733   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3734   srow     = gen_to->indices;   /* local row index to be sent */
3735   rstarts  = gen_from->starts;
3736   sstarts  = gen_to->starts;
3737   rprocs   = gen_from->procs;
3738   sprocs   = gen_to->procs;
3739   sstatus  = gen_to->sstatus;
3740 
3741   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3742   if (scall == MAT_INITIAL_MATRIX){
3743     /* i-array */
3744     /*---------*/
3745     /*  post receives */
3746     for (i=0; i<nrecvs; i++){
3747       rowlen = (PetscInt*)rvalues + rstarts[i];
3748       nrows = rstarts[i+1]-rstarts[i];
3749       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3750     }
3751 
3752     /* pack the outgoing message */
3753     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3754     rstartsj = sstartsj + nsends +1;
3755     sstartsj[0] = 0;  rstartsj[0] = 0;
3756     len = 0; /* total length of j or a array to be sent */
3757     k = 0;
3758     for (i=0; i<nsends; i++){
3759       rowlen = (PetscInt*)svalues + sstarts[i];
3760       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3761       for (j=0; j<nrows; j++) {
3762         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3763         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3764         len += rowlen[j];
3765         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3766         k++;
3767       }
3768       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3769        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3770     }
3771     /* recvs and sends of i-array are completed */
3772     i = nrecvs;
3773     while (i--) {
3774       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3775     }
3776     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3777     /* allocate buffers for sending j and a arrays */
3778     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3779     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3780 
3781     /* create i-array of B_oth */
3782     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3783     b_othi[0] = 0;
3784     len = 0; /* total length of j or a array to be received */
3785     k = 0;
3786     for (i=0; i<nrecvs; i++){
3787       rowlen = (PetscInt*)rvalues + rstarts[i];
3788       nrows = rstarts[i+1]-rstarts[i];
3789       for (j=0; j<nrows; j++) {
3790         b_othi[k+1] = b_othi[k] + rowlen[j];
3791         len += rowlen[j]; k++;
3792       }
3793       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3794     }
3795 
3796     /* allocate space for j and a arrrays of B_oth */
3797     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3798     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3799 
3800     /* j-array */
3801     /*---------*/
3802     /*  post receives of j-array */
3803     for (i=0; i<nrecvs; i++){
3804       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3805       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3806     }
3807     k = 0;
3808     for (i=0; i<nsends; i++){
3809       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3810       bufJ = bufj+sstartsj[i];
3811       for (j=0; j<nrows; j++) {
3812         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3813         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3814         for (l=0; l<ncols; l++){
3815           *bufJ++ = cols[l];
3816         }
3817         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3818       }
3819       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3820     }
3821 
3822     /* recvs and sends of j-array are completed */
3823     i = nrecvs;
3824     while (i--) {
3825       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3826     }
3827     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3828   } else if (scall == MAT_REUSE_MATRIX){
3829     sstartsj = *startsj;
3830     rstartsj = sstartsj + nsends +1;
3831     bufa     = *bufa_ptr;
3832     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3833     b_otha   = b_oth->a;
3834   } else {
3835     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3836   }
3837 
3838   /* a-array */
3839   /*---------*/
3840   /*  post receives of a-array */
3841   for (i=0; i<nrecvs; i++){
3842     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3843     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3844   }
3845   k = 0;
3846   for (i=0; i<nsends; i++){
3847     nrows = sstarts[i+1]-sstarts[i];
3848     bufA = bufa+sstartsj[i];
3849     for (j=0; j<nrows; j++) {
3850       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3851       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3852       for (l=0; l<ncols; l++){
3853         *bufA++ = vals[l];
3854       }
3855       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3856 
3857     }
3858     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3859   }
3860   /* recvs and sends of a-array are completed */
3861   i = nrecvs;
3862   while (i--) {
3863     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3864   }
3865   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3866   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3867 
3868   if (scall == MAT_INITIAL_MATRIX){
3869     /* put together the new matrix */
3870     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3871 
3872     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3873     /* Since these are PETSc arrays, change flags to free them as necessary. */
3874     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3875     b_oth->freedata = PETSC_TRUE;
3876     b_oth->nonew    = 0;
3877 
3878     ierr = PetscFree(bufj);CHKERRQ(ierr);
3879     if (!startsj || !bufa_ptr){
3880       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3882     } else {
3883       *startsj  = sstartsj;
3884       *bufa_ptr = bufa;
3885     }
3886   }
3887   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3888 
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 #undef __FUNCT__
3893 #define __FUNCT__ "MatGetCommunicationStructs"
3894 /*@C
3895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3896 
3897   Not Collective
3898 
3899   Input Parameters:
3900 . A - The matrix in mpiaij format
3901 
3902   Output Parameter:
3903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3904 . colmap - A map from global column index to local index into lvec
3905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3906 
3907   Level: developer
3908 
3909 @*/
3910 #if defined (PETSC_USE_CTABLE)
3911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3912 #else
3913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3914 #endif
3915 {
3916   Mat_MPIAIJ *a;
3917 
3918   PetscFunctionBegin;
3919   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3920   PetscValidPointer(lvec, 2)
3921   PetscValidPointer(colmap, 3)
3922   PetscValidPointer(multScatter, 4)
3923   a = (Mat_MPIAIJ *) A->data;
3924   if (lvec) *lvec = a->lvec;
3925   if (colmap) *colmap = a->colmap;
3926   if (multScatter) *multScatter = a->Mvctx;
3927   PetscFunctionReturn(0);
3928 }
3929 
3930 /*MC
3931    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3932 
3933    Options Database Keys:
3934 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3935 
3936   Level: beginner
3937 
3938 .seealso: MatCreateMPIAIJ
3939 M*/
3940 
3941 EXTERN_C_BEGIN
3942 #undef __FUNCT__
3943 #define __FUNCT__ "MatCreate_MPIAIJ"
3944 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3945 {
3946   Mat_MPIAIJ     *b;
3947   PetscErrorCode ierr;
3948   PetscMPIInt    size;
3949 
3950   PetscFunctionBegin;
3951   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3952 
3953   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3954   B->data         = (void*)b;
3955   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3956   B->factor       = 0;
3957   B->rmap.bs      = 1;
3958   B->assembled    = PETSC_FALSE;
3959   B->mapping      = 0;
3960 
3961   B->insertmode      = NOT_SET_VALUES;
3962   b->size            = size;
3963   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3964 
3965   /* build cache for off array entries formed */
3966   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3967   b->donotstash  = PETSC_FALSE;
3968   b->colmap      = 0;
3969   b->garray      = 0;
3970   b->roworiented = PETSC_TRUE;
3971 
3972   /* stuff used for matrix vector multiply */
3973   b->lvec      = PETSC_NULL;
3974   b->Mvctx     = PETSC_NULL;
3975 
3976   /* stuff for MatGetRow() */
3977   b->rowindices   = 0;
3978   b->rowvalues    = 0;
3979   b->getrowactive = PETSC_FALSE;
3980 
3981 
3982   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3983                                      "MatStoreValues_MPIAIJ",
3984                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3985   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3986                                      "MatRetrieveValues_MPIAIJ",
3987                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3988   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3989 				     "MatGetDiagonalBlock_MPIAIJ",
3990                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3991   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3992 				     "MatIsTranspose_MPIAIJ",
3993 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3994   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3995 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3996 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3997   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3998 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3999 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4001 				     "MatDiagonalScaleLocal_MPIAIJ",
4002 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4003   PetscFunctionReturn(0);
4004 }
4005 EXTERN_C_END
4006 
4007 /*
4008     Special version for direct calls from Fortran
4009 */
4010 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4011 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4012 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4013 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4014 #endif
4015 
4016 /* Change these macros so can be used in void function */
4017 #undef CHKERRQ
4018 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4019 #undef SETERRQ2
4020 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4021 #undef SETERRQ
4022 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4023 
4024 EXTERN_C_BEGIN
4025 #undef __FUNCT__
4026 #define __FUNCT__ "matsetvaluesmpiaij_"
4027 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
4028 {
4029   Mat            mat = *mmat;
4030   PetscInt       m = *mm, n = *mn;
4031   InsertMode     addv = *maddv;
4032   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4033   PetscScalar    value;
4034   PetscErrorCode ierr;
4035 
4036   MatPreallocated(mat);
4037   if (mat->insertmode == NOT_SET_VALUES) {
4038     mat->insertmode = addv;
4039   }
4040 #if defined(PETSC_USE_DEBUG)
4041   else if (mat->insertmode != addv) {
4042     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4043   }
4044 #endif
4045   {
4046   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4047   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4048   PetscTruth     roworiented = aij->roworiented;
4049 
4050   /* Some Variables required in the macro */
4051   Mat            A = aij->A;
4052   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4053   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4054   PetscScalar    *aa = a->a;
4055   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4056   Mat            B = aij->B;
4057   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4058   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4059   PetscScalar    *ba = b->a;
4060 
4061   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4062   PetscInt       nonew = a->nonew;
4063   PetscScalar    *ap1,*ap2;
4064 
4065   PetscFunctionBegin;
4066   for (i=0; i<m; i++) {
4067     if (im[i] < 0) continue;
4068 #if defined(PETSC_USE_DEBUG)
4069     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4070 #endif
4071     if (im[i] >= rstart && im[i] < rend) {
4072       row      = im[i] - rstart;
4073       lastcol1 = -1;
4074       rp1      = aj + ai[row];
4075       ap1      = aa + ai[row];
4076       rmax1    = aimax[row];
4077       nrow1    = ailen[row];
4078       low1     = 0;
4079       high1    = nrow1;
4080       lastcol2 = -1;
4081       rp2      = bj + bi[row];
4082       ap2      = ba + bi[row];
4083       rmax2    = bimax[row];
4084       nrow2    = bilen[row];
4085       low2     = 0;
4086       high2    = nrow2;
4087 
4088       for (j=0; j<n; j++) {
4089         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4090         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4091         if (in[j] >= cstart && in[j] < cend){
4092           col = in[j] - cstart;
4093           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4094         } else if (in[j] < 0) continue;
4095 #if defined(PETSC_USE_DEBUG)
4096         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4097 #endif
4098         else {
4099           if (mat->was_assembled) {
4100             if (!aij->colmap) {
4101               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4102             }
4103 #if defined (PETSC_USE_CTABLE)
4104             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4105 	    col--;
4106 #else
4107             col = aij->colmap[in[j]] - 1;
4108 #endif
4109             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4110               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4111               col =  in[j];
4112               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4113               B = aij->B;
4114               b = (Mat_SeqAIJ*)B->data;
4115               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4116               rp2      = bj + bi[row];
4117               ap2      = ba + bi[row];
4118               rmax2    = bimax[row];
4119               nrow2    = bilen[row];
4120               low2     = 0;
4121               high2    = nrow2;
4122               bm       = aij->B->rmap.n;
4123               ba = b->a;
4124             }
4125           } else col = in[j];
4126           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4127         }
4128       }
4129     } else {
4130       if (!aij->donotstash) {
4131         if (roworiented) {
4132           if (ignorezeroentries && v[i*n] == 0.0) continue;
4133           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4134         } else {
4135           if (ignorezeroentries && v[i] == 0.0) continue;
4136           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4137         }
4138       }
4139     }
4140   }}
4141   PetscFunctionReturnVoid();
4142 }
4143 EXTERN_C_END
4144