xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a8a8f36618f536eff5e4c1df78b3fdf9dd5d254e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
204     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
205     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
206     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
207     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
208     rowners[0] = 0;
209     for (i=2; i<=size; i++) {
210       rowners[i] += rowners[i-1];
211     }
212     rstart = rowners[rank];
213     rend   = rowners[rank+1];
214     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
215     if (!rank) {
216       gmata = (Mat_SeqAIJ*) gmat->data;
217       /* send row lengths to all processors */
218       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
219       for (i=1; i<size; i++) {
220 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222       /* determine number diagonal and off-diagonal counts */
223       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
224       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
225       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
226       jj = 0;
227       for (i=0; i<m; i++) {
228 	for (j=0; j<dlens[i]; j++) {
229           if (gmata->j[jj] < rstart) ld[i]++;
230 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
231 	  jj++;
232 	}
233       }
234       /* send column indices to other processes */
235       for (i=1; i<size; i++) {
236 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
237 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
238 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
239       }
240 
241       /* send numerical values to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
245       }
246       gmataa = gmata->a;
247       gmataj = gmata->j;
248 
249     } else {
250       /* receive row lengths */
251       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
252       /* receive column indices */
253       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
255       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       /* determine number diagonal and off-diagonal counts */
257       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
258       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
259       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
260       jj = 0;
261       for (i=0; i<m; i++) {
262 	for (j=0; j<dlens[i]; j++) {
263           if (gmataj[jj] < rstart) ld[i]++;
264 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
265 	  jj++;
266 	}
267       }
268       /* receive numerical values */
269       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
270       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
271     }
272     /* set preallocation */
273     for (i=0; i<m; i++) {
274       dlens[i] -= olens[i];
275     }
276     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
277     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
278 
279     for (i=0; i<m; i++) {
280       dlens[i] += olens[i];
281     }
282     cnt  = 0;
283     for (i=0; i<m; i++) {
284       row  = rstart + i;
285       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
286       cnt += dlens[i];
287     }
288     if (rank) {
289       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
290     }
291     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
292     ierr = PetscFree(rowners);CHKERRQ(ierr);
293     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
294     *inmat = mat;
295   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
296     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
297     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
298     mat   = *inmat;
299     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
300     if (!rank) {
301       /* send numerical values to other processes */
302       gmata = (Mat_SeqAIJ*) gmat->data;
303       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
304       gmataa = gmata->a;
305       for (i=1; i<size; i++) {
306         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
307         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
308       }
309       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
310     } else {
311       /* receive numerical values from process 0*/
312       nz   = Ad->nz + Ao->nz;
313       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
314       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
315     }
316     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
317     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
318     ad = Ad->a;
319     ao = Ao->a;
320     if (mat->rmap->n) {
321       i  = 0;
322       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
323       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
324     }
325     for (i=1; i<mat->rmap->n; i++) {
326       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
327       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
328     }
329     i--;
330     if (mat->rmap->n) {
331       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332     }
333     if (rank) {
334       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
335     }
336   }
337   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
339   CHKMEMQ;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344   Local utility routine that creates a mapping from the global column
345 number to the local number in the off-diagonal part of the local
346 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
347 a slightly higher hash table cost; without it it is not scalable (each processor
348 has an order N integer array but is fast to acess.
349 */
350 #undef __FUNCT__
351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
353 {
354   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
355   PetscErrorCode ierr;
356   PetscInt       n = aij->B->cmap->n,i;
357 
358   PetscFunctionBegin;
359   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
360 #if defined (PETSC_USE_CTABLE)
361   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
362   for (i=0; i<n; i++){
363     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
364   }
365 #else
366   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
367   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
369   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
370 #endif
371   PetscFunctionReturn(0);
372 }
373 
374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
375 { \
376     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
377     lastcol1 = col;\
378     while (high1-low1 > 5) { \
379       t = (low1+high1)/2; \
380       if (rp1[t] > col) high1 = t; \
381       else             low1  = t; \
382     } \
383       for (_i=low1; _i<high1; _i++) { \
384         if (rp1[_i] > col) break; \
385         if (rp1[_i] == col) { \
386           if (addv == ADD_VALUES) ap1[_i] += value;   \
387           else                    ap1[_i] = value; \
388           goto a_noinsert; \
389         } \
390       }  \
391       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
392       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
393       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
394       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
395       N = nrow1++ - 1; a->nz++; high1++; \
396       /* shift up all the later entries in this row */ \
397       for (ii=N; ii>=_i; ii--) { \
398         rp1[ii+1] = rp1[ii]; \
399         ap1[ii+1] = ap1[ii]; \
400       } \
401       rp1[_i] = col;  \
402       ap1[_i] = value;  \
403       a_noinsert: ; \
404       ailen[row] = nrow1; \
405 }
406 
407 
408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
409 { \
410     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
411     lastcol2 = col;\
412     while (high2-low2 > 5) { \
413       t = (low2+high2)/2; \
414       if (rp2[t] > col) high2 = t; \
415       else             low2  = t; \
416     } \
417     for (_i=low2; _i<high2; _i++) {		\
418       if (rp2[_i] > col) break;			\
419       if (rp2[_i] == col) {			      \
420 	if (addv == ADD_VALUES) ap2[_i] += value;     \
421 	else                    ap2[_i] = value;      \
422 	goto b_noinsert;			      \
423       }						      \
424     }							      \
425     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
426     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
427     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
428     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
429     N = nrow2++ - 1; b->nz++; high2++;					\
430     /* shift up all the later entries in this row */			\
431     for (ii=N; ii>=_i; ii--) {						\
432       rp2[ii+1] = rp2[ii];						\
433       ap2[ii+1] = ap2[ii];						\
434     }									\
435     rp2[_i] = col;							\
436     ap2[_i] = value;							\
437     b_noinsert: ;								\
438     bilen[row] = nrow2;							\
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
444 {
445   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
446   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
447   PetscErrorCode ierr;
448   PetscInt       l,*garray = mat->garray,diag;
449 
450   PetscFunctionBegin;
451   /* code only works for square matrices A */
452 
453   /* find size of row to the left of the diagonal part */
454   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
455   row  = row - diag;
456   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
457     if (garray[b->j[b->i[row]+l]] > diag) break;
458   }
459   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
460 
461   /* diagonal part */
462   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
463 
464   /* right of diagonal part */
465   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatSetValues_MPIAIJ"
471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
472 {
473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
474   PetscScalar    value;
475   PetscErrorCode ierr;
476   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
477   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
478   PetscBool      roworiented = aij->roworiented;
479 
480   /* Some Variables required in the macro */
481   Mat            A = aij->A;
482   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
483   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
484   MatScalar      *aa = a->a;
485   PetscBool      ignorezeroentries = a->ignorezeroentries;
486   Mat            B = aij->B;
487   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
488   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
489   MatScalar      *ba = b->a;
490 
491   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
492   PetscInt       nonew;
493   MatScalar      *ap1,*ap2;
494 
495   PetscFunctionBegin;
496   if (v) PetscValidScalarPointer(v,6);
497   for (i=0; i<m; i++) {
498     if (im[i] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
501 #endif
502     if (im[i] >= rstart && im[i] < rend) {
503       row      = im[i] - rstart;
504       lastcol1 = -1;
505       rp1      = aj + ai[row];
506       ap1      = aa + ai[row];
507       rmax1    = aimax[row];
508       nrow1    = ailen[row];
509       low1     = 0;
510       high1    = nrow1;
511       lastcol2 = -1;
512       rp2      = bj + bi[row];
513       ap2      = ba + bi[row];
514       rmax2    = bimax[row];
515       nrow2    = bilen[row];
516       low2     = 0;
517       high2    = nrow2;
518 
519       for (j=0; j<n; j++) {
520         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
521         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
522         if (in[j] >= cstart && in[j] < cend){
523           col = in[j] - cstart;
524           nonew = a->nonew;
525           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
526         } else if (in[j] < 0) continue;
527 #if defined(PETSC_USE_DEBUG)
528         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
529 #endif
530         else {
531           if (mat->was_assembled) {
532             if (!aij->colmap) {
533               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
534             }
535 #if defined (PETSC_USE_CTABLE)
536             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
537 	    col--;
538 #else
539             col = aij->colmap[in[j]] - 1;
540 #endif
541             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
542               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
543               col =  in[j];
544               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
545               B = aij->B;
546               b = (Mat_SeqAIJ*)B->data;
547               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
548               rp2      = bj + bi[row];
549               ap2      = ba + bi[row];
550               rmax2    = bimax[row];
551               nrow2    = bilen[row];
552               low2     = 0;
553               high2    = nrow2;
554               bm       = aij->B->rmap->n;
555               ba = b->a;
556             }
557           } else col = in[j];
558           nonew = b->nonew;
559           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
560         }
561       }
562     } else {
563       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
564       if (!aij->donotstash) {
565         mat->assembled = PETSC_FALSE;
566         if (roworiented) {
567           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
568         } else {
569           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
570         }
571       }
572     }
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIAIJ"
579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
584   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= rstart && idxm[i] < rend) {
591       row = idxm[i] - rstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= cstart && idxn[j] < cend){
596           col = idxn[j] - cstart;
597           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!aij->colmap) {
600             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
604           col --;
605 #else
606           col = aij->colmap[idxn[j]] - 1;
607 #endif
608           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
609           else {
610             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611           }
612         }
613       }
614     } else {
615       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
616     }
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
626 {
627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
628   PetscErrorCode ierr;
629   PetscInt       nstash,reallocs;
630   InsertMode     addv;
631 
632   PetscFunctionBegin;
633   if (aij->donotstash || mat->nooffprocentries) {
634     PetscFunctionReturn(0);
635   }
636 
637   /* make sure all processors are either in INSERTMODE or ADDMODE */
638   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
639   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
640   mat->insertmode = addv; /* in case this processor had no cache */
641 
642   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
643   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
644   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
653   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
654   PetscErrorCode ierr;
655   PetscMPIInt    n;
656   PetscInt       i,j,rstart,ncols,flg;
657   PetscInt       *row,*col;
658   PetscBool      other_disassembled;
659   PetscScalar    *val;
660   InsertMode     addv = mat->insertmode;
661 
662   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
663   PetscFunctionBegin;
664   if (!aij->donotstash && !mat->nooffprocentries) {
665     while (1) {
666       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
667       if (!flg) break;
668 
669       for (i=0; i<n;) {
670         /* Now identify the consecutive vals belonging to the same row */
671         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
672         if (j < n) ncols = j-i;
673         else       ncols = n-i;
674         /* Now assemble all these values with a single function call */
675         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
676         i = j;
677       }
678     }
679     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
680   }
681   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
682   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
683 
684   /* determine if any processor has disassembled, if so we must
685      also disassemble ourselfs, in order that we may reassemble. */
686   /*
687      if nonzero structure of submatrix B cannot change then we know that
688      no processor disassembled thus we can skip this stuff
689   */
690   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
691     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
692     if (mat->was_assembled && !other_disassembled) {
693       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
694     }
695   }
696   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
697     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
698   }
699   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
700   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
701   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
702   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
703 
704   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
705   aij->rowvalues = 0;
706 
707   /* used by MatAXPY() */
708   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
709   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
710 
711   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
712   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
719 {
720   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
725   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroRows_MPIAIJ"
731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
732 {
733   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode    ierr;
735   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
736   PetscInt          i,*owners = A->rmap->range;
737   PetscInt          *nprocs,j,idx,nsends,row;
738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
739   PetscInt          *rvalues,count,base,slen,*source;
740   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
741   MPI_Comm          comm = ((PetscObject)A)->comm;
742   MPI_Request       *send_waits,*recv_waits;
743   MPI_Status        recv_status,*send_status;
744   const PetscScalar *xx;
745   PetscScalar       *bb;
746 #if defined(PETSC_DEBUG)
747   PetscBool      found = PETSC_FALSE;
748 #endif
749 
750   PetscFunctionBegin;
751   /*  first count number of contributors to each processor */
752   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
753   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
754   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
755   j = 0;
756   for (i=0; i<N; i++) {
757     if (lastidx > (idx = rows[i])) j = 0;
758     lastidx = idx;
759     for (; j<size; j++) {
760       if (idx >= owners[j] && idx < owners[j+1]) {
761         nprocs[2*j]++;
762         nprocs[2*j+1] = 1;
763         owner[i] = j;
764 #if defined(PETSC_DEBUG)
765         found = PETSC_TRUE;
766 #endif
767         break;
768       }
769     }
770 #if defined(PETSC_DEBUG)
771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
772     found = PETSC_FALSE;
773 #endif
774   }
775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
776 
777   if (A->nooffproczerorows) {
778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
779     nrecvs = nsends;
780     nmax   = N;
781   } else {
782     /* inform other processors of number of messages and max length*/
783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
784   }
785 
786   /* post receives:   */
787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
789   for (i=0; i<nrecvs; i++) {
790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
791   }
792 
793   /* do sends:
794       1) starts[i] gives the starting index in svalues for stuff going to
795          the ith processor
796   */
797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
800   starts[0] = 0;
801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
802   for (i=0; i<N; i++) {
803     svalues[starts[owner[i]]++] = rows[i];
804   }
805 
806   starts[0] = 0;
807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
808   count = 0;
809   for (i=0; i<size; i++) {
810     if (nprocs[2*i+1]) {
811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
812     }
813   }
814   ierr = PetscFree(starts);CHKERRQ(ierr);
815 
816   base = owners[rank];
817 
818   /*  wait on receives */
819   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
820   count  = nrecvs; slen = 0;
821   while (count) {
822     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
823     /* unpack receives into our local space */
824     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
825     source[imdex]  = recv_status.MPI_SOURCE;
826     lens[imdex]    = n;
827     slen          += n;
828     count--;
829   }
830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
831 
832   /* move the data into the send scatter */
833   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
834   count = 0;
835   for (i=0; i<nrecvs; i++) {
836     values = rvalues + i*nmax;
837     for (j=0; j<lens[i]; j++) {
838       lrows[count++] = values[j] - base;
839     }
840   }
841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
843   ierr = PetscFree(owner);CHKERRQ(ierr);
844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
845 
846   /* fix right hand side if needed */
847   if (x && b) {
848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
850     for (i=0; i<slen; i++) {
851       bb[lrows[i]] = diag*xx[lrows[i]];
852     }
853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
855   }
856   /*
857         Zero the required rows. If the "diagonal block" of the matrix
858      is square and the user wishes to set the diagonal we use separate
859      code so that MatSetValues() is not called for each diagonal allocating
860      new memory, thus calling lots of mallocs and slowing things down.
861 
862   */
863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
864   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
866     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
867   } else if (diag != 0.0) {
868     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
869     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
870       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
872     }
873     for (i = 0; i < slen; i++) {
874       row  = lrows[i] + rstart;
875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
876     }
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879   } else {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(lrows);CHKERRQ(ierr);
883 
884   /* wait on sends */
885   if (nsends) {
886     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
887     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
888     ierr = PetscFree(send_status);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(send_waits);CHKERRQ(ierr);
891   ierr = PetscFree(svalues);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
898 {
899   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
900   PetscErrorCode    ierr;
901   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
902   PetscInt          i,*owners = A->rmap->range;
903   PetscInt          *nprocs,j,idx,nsends;
904   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
905   PetscInt          *rvalues,count,base,slen,*source;
906   PetscInt          *lens,*lrows,*values,m;
907   MPI_Comm          comm = ((PetscObject)A)->comm;
908   MPI_Request       *send_waits,*recv_waits;
909   MPI_Status        recv_status,*send_status;
910   const PetscScalar *xx;
911   PetscScalar       *bb,*mask;
912   Vec               xmask,lmask;
913   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
914   const PetscInt    *aj, *ii,*ridx;
915   PetscScalar       *aa;
916 #if defined(PETSC_DEBUG)
917   PetscBool         found = PETSC_FALSE;
918 #endif
919 
920   PetscFunctionBegin;
921   /*  first count number of contributors to each processor */
922   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
923   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
924   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
925   j = 0;
926   for (i=0; i<N; i++) {
927     if (lastidx > (idx = rows[i])) j = 0;
928     lastidx = idx;
929     for (; j<size; j++) {
930       if (idx >= owners[j] && idx < owners[j+1]) {
931         nprocs[2*j]++;
932         nprocs[2*j+1] = 1;
933         owner[i] = j;
934 #if defined(PETSC_DEBUG)
935         found = PETSC_TRUE;
936 #endif
937         break;
938       }
939     }
940 #if defined(PETSC_DEBUG)
941     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
942     found = PETSC_FALSE;
943 #endif
944   }
945   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
946 
947   /* inform other processors of number of messages and max length*/
948   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
949 
950   /* post receives:   */
951   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
952   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
953   for (i=0; i<nrecvs; i++) {
954     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
955   }
956 
957   /* do sends:
958       1) starts[i] gives the starting index in svalues for stuff going to
959          the ith processor
960   */
961   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
963   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
964   starts[0] = 0;
965   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
966   for (i=0; i<N; i++) {
967     svalues[starts[owner[i]]++] = rows[i];
968   }
969 
970   starts[0] = 0;
971   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
972   count = 0;
973   for (i=0; i<size; i++) {
974     if (nprocs[2*i+1]) {
975       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
976     }
977   }
978   ierr = PetscFree(starts);CHKERRQ(ierr);
979 
980   base = owners[rank];
981 
982   /*  wait on receives */
983   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
984   count  = nrecvs; slen = 0;
985   while (count) {
986     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
987     /* unpack receives into our local space */
988     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
989     source[imdex]  = recv_status.MPI_SOURCE;
990     lens[imdex]    = n;
991     slen          += n;
992     count--;
993   }
994   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
995 
996   /* move the data into the send scatter */
997   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
998   count = 0;
999   for (i=0; i<nrecvs; i++) {
1000     values = rvalues + i*nmax;
1001     for (j=0; j<lens[i]; j++) {
1002       lrows[count++] = values[j] - base;
1003     }
1004   }
1005   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1006   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1007   ierr = PetscFree(owner);CHKERRQ(ierr);
1008   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1009   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1010 
1011   /* zero diagonal part of matrix */
1012   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1013 
1014   /* handle off diagonal part of matrix */
1015   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1016   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1017   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1018   for (i=0; i<slen; i++) {
1019     bb[lrows[i]] = 1;
1020   }
1021   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1022   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1025   if (x) {
1026     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1029     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1030   }
1031   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1032 
1033   /* remove zeroed rows of off diagonal matrix */
1034   ii = aij->i;
1035   for (i=0; i<slen; i++) {
1036     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1037   }
1038 
1039   /* loop over all elements of off process part of matrix zeroing removed columns*/
1040   if (aij->compressedrow.use){
1041     m    = aij->compressedrow.nrows;
1042     ii   = aij->compressedrow.i;
1043     ridx = aij->compressedrow.rindex;
1044     for (i=0; i<m; i++){
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048 
1049       for (j=0; j<n; j++) {
1050         if (PetscAbsScalar(mask[*aj])) {
1051           if (b) bb[*ridx] -= *aa*xx[*aj];
1052           *aa        = 0.0;
1053         }
1054         aa++;
1055         aj++;
1056       }
1057       ridx++;
1058     }
1059   } else { /* do not use compressed row format */
1060     m = l->B->rmap->n;
1061     for (i=0; i<m; i++) {
1062       n   = ii[i+1] - ii[i];
1063       aj  = aij->j + ii[i];
1064       aa  = aij->a + ii[i];
1065       for (j=0; j<n; j++) {
1066         if (PetscAbsScalar(mask[*aj])) {
1067           if (b) bb[i] -= *aa*xx[*aj];
1068           *aa    = 0.0;
1069         }
1070         aa++;
1071         aj++;
1072       }
1073     }
1074   }
1075   if (x) {
1076     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1077     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1078   }
1079   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1080   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1081   ierr = PetscFree(lrows);CHKERRQ(ierr);
1082 
1083   /* wait on sends */
1084   if (nsends) {
1085     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1086     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1087     ierr = PetscFree(send_status);CHKERRQ(ierr);
1088   }
1089   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1090   ierr = PetscFree(svalues);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMult_MPIAIJ"
1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101   PetscInt       nt;
1102 
1103   PetscFunctionBegin;
1104   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1105   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1106   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1108   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1143 {
1144   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscBool      merged;
1147 
1148   PetscFunctionBegin;
1149   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1150   /* do nondiagonal part */
1151   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1152   if (!merged) {
1153     /* send it on its way */
1154     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     /* do local part */
1156     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1157     /* receive remote parts: note this assumes the values are not actually */
1158     /* added in yy until the next line, */
1159     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160   } else {
1161     /* do local part */
1162     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1163     /* send it on its way */
1164     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165     /* values actually were received in the Begin() but we need to call this nop */
1166     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1174 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1175 {
1176   MPI_Comm       comm;
1177   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1178   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1179   IS             Me,Notme;
1180   PetscErrorCode ierr;
1181   PetscInt       M,N,first,last,*notme,i;
1182   PetscMPIInt    size;
1183 
1184   PetscFunctionBegin;
1185 
1186   /* Easy test: symmetric diagonal block */
1187   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1188   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1189   if (!*f) PetscFunctionReturn(0);
1190   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1192   if (size == 1) PetscFunctionReturn(0);
1193 
1194   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1195   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1196   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1197   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1198   for (i=0; i<first; i++) notme[i] = i;
1199   for (i=last; i<M; i++) notme[i-last+first] = i;
1200   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1201   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1202   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1203   Aoff = Aoffs[0];
1204   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1205   Boff = Boffs[0];
1206   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1207   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1209   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1211   ierr = PetscFree(notme);CHKERRQ(ierr);
1212    PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* send it on its way */
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   /* do local part */
1229   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1230   /* receive remote parts */
1231   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*
1236   This only works correctly for square matrices where the subblock A->A is the
1237    diagonal block
1238 */
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1242 {
1243   PetscErrorCode ierr;
1244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1248   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1249   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatScale_MPIAIJ"
1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1256 {
1257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1262   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIAIJ"
1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1279   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1281 #if defined (PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode    ierr;
1313   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1314   int               fd;
1315   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1316   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1317   PetscScalar       *column_values;
1318   PetscInt          message_count,flowcontrolcount;
1319   FILE              *file;
1320 
1321   PetscFunctionBegin;
1322   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1323   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1324   nz   = A->nz + B->nz;
1325   if (!rank) {
1326     header[0] = MAT_FILE_CLASSID;
1327     header[1] = mat->rmap->N;
1328     header[2] = mat->cmap->N;
1329     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1331     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     /* get largest number of rows any processor has */
1333     rlen = mat->rmap->n;
1334     range = mat->rmap->range;
1335     for (i=1; i<size; i++) {
1336       rlen = PetscMax(rlen,range[i+1] - range[i]);
1337     }
1338   } else {
1339     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     rlen = mat->rmap->n;
1341   }
1342 
1343   /* load up the local row counts */
1344   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1345   for (i=0; i<mat->rmap->n; i++) {
1346     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347   }
1348 
1349   /* store the row lengths to the file */
1350   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1351   if (!rank) {
1352     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     for (i=1; i<size; i++) {
1354       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1355       rlen = range[i+1] - range[i];
1356       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1357       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1360   } else {
1361     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1362     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1363     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1366 
1367   /* load up the local column indices */
1368   nzmax = nz; /* )th processor needs space a largest processor needs */
1369   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1370   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1371   cnt  = 0;
1372   for (i=0; i<mat->rmap->n; i++) {
1373     for (j=B->i[i]; j<B->i[i+1]; j++) {
1374       if ( (col = garray[B->j[j]]) > cstart) break;
1375       column_indices[cnt++] = col;
1376     }
1377     for (k=A->i[i]; k<A->i[i+1]; k++) {
1378       column_indices[cnt++] = A->j[k] + cstart;
1379     }
1380     for (; j<B->i[i+1]; j++) {
1381       column_indices[cnt++] = garray[B->j[j]];
1382     }
1383   }
1384   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1385 
1386   /* store the column indices to the file */
1387    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1388   if (!rank) {
1389     MPI_Status status;
1390     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1391     for (i=1; i<size; i++) {
1392       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1393       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1394       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1395       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1396       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1397     }
1398      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1399   } else {
1400     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1401     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1402     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1403     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1404   }
1405   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1406 
1407   /* load up the local column values */
1408   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1409   cnt  = 0;
1410   for (i=0; i<mat->rmap->n; i++) {
1411     for (j=B->i[i]; j<B->i[i+1]; j++) {
1412       if ( garray[B->j[j]] > cstart) break;
1413       column_values[cnt++] = B->a[j];
1414     }
1415     for (k=A->i[i]; k<A->i[i+1]; k++) {
1416       column_values[cnt++] = A->a[k];
1417     }
1418     for (; j<B->i[i+1]; j++) {
1419       column_values[cnt++] = B->a[j];
1420     }
1421   }
1422   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1423 
1424   /* store the column values to the file */
1425    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1426   if (!rank) {
1427     MPI_Status status;
1428     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1429     for (i=1; i<size; i++) {
1430        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1431       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1432       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1433       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1434       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1435     }
1436     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1437   } else {
1438     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1439     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1440     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1441     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1442   }
1443   ierr = PetscFree(column_values);CHKERRQ(ierr);
1444 
1445   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1446   if (file) {
1447     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1448   }
1449 
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1455 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1456 {
1457   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1458   PetscErrorCode    ierr;
1459   PetscMPIInt       rank = aij->rank,size = aij->size;
1460   PetscBool         isdraw,iascii,isbinary;
1461   PetscViewer       sviewer;
1462   PetscViewerFormat format;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   if (iascii) {
1469     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1470     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1471       MatInfo    info;
1472       PetscBool  inodes;
1473 
1474       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1475       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1478       if (!inodes) {
1479         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1480 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1481       } else {
1482         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1483 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1484       }
1485       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1487       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1489       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1490       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1492       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1493       PetscFunctionReturn(0);
1494     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1495       PetscInt   inodecount,inodelimit,*inodes;
1496       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1497       if (inodes) {
1498         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1499       } else {
1500         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1501       }
1502       PetscFunctionReturn(0);
1503     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1504       PetscFunctionReturn(0);
1505     }
1506   } else if (isbinary) {
1507     if (size == 1) {
1508       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1509       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1510     } else {
1511       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1512     }
1513     PetscFunctionReturn(0);
1514   } else if (isdraw) {
1515     PetscDraw  draw;
1516     PetscBool  isnull;
1517     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1518     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1519   }
1520 
1521   if (size == 1) {
1522     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1523     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1524   } else {
1525     /* assemble the entire matrix onto first processor. */
1526     Mat         A;
1527     Mat_SeqAIJ  *Aloc;
1528     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1529     MatScalar   *a;
1530 
1531     if (mat->rmap->N > 1024) {
1532       PetscBool  flg = PETSC_FALSE;
1533 
1534       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1535       if (!flg) {
1536         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1537       }
1538     }
1539 
1540     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1541     if (!rank) {
1542       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1543     } else {
1544       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1545     }
1546     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1547     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1548     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1549     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1550     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1551 
1552     /* copy over the A part */
1553     Aloc = (Mat_SeqAIJ*)aij->A->data;
1554     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row = mat->rmap->rstart;
1556     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1557     for (i=0; i<m; i++) {
1558       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1559       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1560     }
1561     aj = Aloc->j;
1562     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1563 
1564     /* copy over the B part */
1565     Aloc = (Mat_SeqAIJ*)aij->B->data;
1566     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567     row  = mat->rmap->rstart;
1568     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1569     ct   = cols;
1570     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1571     for (i=0; i<m; i++) {
1572       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1573       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1574     }
1575     ierr = PetscFree(ct);CHKERRQ(ierr);
1576     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578     /*
1579        Everyone has to call to draw the matrix since the graphics waits are
1580        synchronized across all processors that share the PetscDraw object
1581     */
1582     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1583     if (!rank) {
1584       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1585       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1586       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1587       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1588     }
1589     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1590     ierr = MatDestroy(&A);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatView_MPIAIJ"
1597 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1598 {
1599   PetscErrorCode ierr;
1600   PetscBool      iascii,isdraw,issocket,isbinary;
1601 
1602   PetscFunctionBegin;
1603   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1604   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1605   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1606   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1607   if (iascii || isdraw || isbinary || issocket) {
1608     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1609   } else {
1610     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatSOR_MPIAIJ"
1617 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1618 {
1619   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1620   PetscErrorCode ierr;
1621   Vec            bb1 = 0;
1622   PetscBool      hasop;
1623 
1624   PetscFunctionBegin;
1625   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1626     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1627   }
1628 
1629   if (flag == SOR_APPLY_UPPER) {
1630     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1631     PetscFunctionReturn(0);
1632   }
1633 
1634   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639 
1640     while (its--) {
1641       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643 
1644       /* update rhs: bb1 = bb - B*x */
1645       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1646       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1647 
1648       /* local sweep */
1649       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1650     }
1651   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1652     if (flag & SOR_ZERO_INITIAL_GUESS) {
1653       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1654       its--;
1655     }
1656     while (its--) {
1657       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659 
1660       /* update rhs: bb1 = bb - B*x */
1661       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1662       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1663 
1664       /* local sweep */
1665       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1666     }
1667   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1668     if (flag & SOR_ZERO_INITIAL_GUESS) {
1669       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1670       its--;
1671     }
1672     while (its--) {
1673       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675 
1676       /* update rhs: bb1 = bb - B*x */
1677       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1678       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1679 
1680       /* local sweep */
1681       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1682     }
1683   }  else if (flag & SOR_EISENSTAT) {
1684     Vec         xx1;
1685 
1686     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1687     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1688 
1689     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1690     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1691     if (!mat->diag) {
1692       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1693       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1694     }
1695     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1696     if (hasop) {
1697       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1698     } else {
1699       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1700     }
1701     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1702 
1703     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1704 
1705     /* local sweep */
1706     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1707     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1708     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1709   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1710 
1711   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatPermute_MPIAIJ"
1717 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1718 {
1719   MPI_Comm       comm;
1720   PetscInt       first,local_rowsize,local_colsize;
1721   const PetscInt *rows;
1722   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1723   PetscErrorCode ierr;
1724 
1725   PetscFunctionBegin;
1726   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1727   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1728   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1729   /* collect the global row permutation and invert it */
1730   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1731   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1732   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1733   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1734   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1735   /* get the local target indices */
1736   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1737   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1738   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1739   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1740   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1741   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1742   /* the column permutation is so much easier;
1743      make a local version of 'colp' and invert it */
1744   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1745   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1746   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1747   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1748   /* now we just get the submatrix */
1749   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1750   /* clean up */
1751   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1758 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1759 {
1760   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1761   Mat            A = mat->A,B = mat->B;
1762   PetscErrorCode ierr;
1763   PetscReal      isend[5],irecv[5];
1764 
1765   PetscFunctionBegin;
1766   info->block_size     = 1.0;
1767   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1768   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1769   isend[3] = info->memory;  isend[4] = info->mallocs;
1770   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1771   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1772   isend[3] += info->memory;  isend[4] += info->mallocs;
1773   if (flag == MAT_LOCAL) {
1774     info->nz_used      = isend[0];
1775     info->nz_allocated = isend[1];
1776     info->nz_unneeded  = isend[2];
1777     info->memory       = isend[3];
1778     info->mallocs      = isend[4];
1779   } else if (flag == MAT_GLOBAL_MAX) {
1780     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1781     info->nz_used      = irecv[0];
1782     info->nz_allocated = irecv[1];
1783     info->nz_unneeded  = irecv[2];
1784     info->memory       = irecv[3];
1785     info->mallocs      = irecv[4];
1786   } else if (flag == MAT_GLOBAL_SUM) {
1787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1788     info->nz_used      = irecv[0];
1789     info->nz_allocated = irecv[1];
1790     info->nz_unneeded  = irecv[2];
1791     info->memory       = irecv[3];
1792     info->mallocs      = irecv[4];
1793   }
1794   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1795   info->fill_ratio_needed = 0;
1796   info->factor_mallocs    = 0;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatSetOption_MPIAIJ"
1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1803 {
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   switch (op) {
1809   case MAT_NEW_NONZERO_LOCATIONS:
1810   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1811   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1812   case MAT_KEEP_NONZERO_PATTERN:
1813   case MAT_NEW_NONZERO_LOCATION_ERR:
1814   case MAT_USE_INODES:
1815   case MAT_IGNORE_ZERO_ENTRIES:
1816     MatCheckPreallocated(A,1);
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_ROW_ORIENTED:
1821     a->roworiented = flg;
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_NEW_DIAGONALS:
1826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1827     break;
1828   case MAT_IGNORE_OFF_PROC_ENTRIES:
1829     a->donotstash = flg;
1830     break;
1831   case MAT_SPD:
1832     A->spd_set                         = PETSC_TRUE;
1833     A->spd                             = flg;
1834     if (flg) {
1835       A->symmetric                     = PETSC_TRUE;
1836       A->structurally_symmetric        = PETSC_TRUE;
1837       A->symmetric_set                 = PETSC_TRUE;
1838       A->structurally_symmetric_set    = PETSC_TRUE;
1839     }
1840     break;
1841   case MAT_SYMMETRIC:
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   case MAT_STRUCTURALLY_SYMMETRIC:
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   case MAT_HERMITIAN:
1848     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1849     break;
1850   case MAT_SYMMETRY_ETERNAL:
1851     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1852     break;
1853   default:
1854     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetRow_MPIAIJ"
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) { max = tmp; }
1883     }
1884     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap  = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1942 {
1943   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1944 
1945   PetscFunctionBegin;
1946   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1947   aij->getrowactive = PETSC_FALSE;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatNorm_MPIAIJ"
1953 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1954 {
1955   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1956   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1957   PetscErrorCode ierr;
1958   PetscInt       i,j,cstart = mat->cmap->rstart;
1959   PetscReal      sum = 0.0;
1960   MatScalar      *v;
1961 
1962   PetscFunctionBegin;
1963   if (aij->size == 1) {
1964     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1965   } else {
1966     if (type == NORM_FROBENIUS) {
1967       v = amat->a;
1968       for (i=0; i<amat->nz; i++) {
1969 #if defined(PETSC_USE_COMPLEX)
1970         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971 #else
1972         sum += (*v)*(*v); v++;
1973 #endif
1974       }
1975       v = bmat->a;
1976       for (i=0; i<bmat->nz; i++) {
1977 #if defined(PETSC_USE_COMPLEX)
1978         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1979 #else
1980         sum += (*v)*(*v); v++;
1981 #endif
1982       }
1983       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1984       *norm = PetscSqrtReal(*norm);
1985     } else if (type == NORM_1) { /* max column norm */
1986       PetscReal *tmp,*tmp2;
1987       PetscInt  *jj,*garray = aij->garray;
1988       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1989       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1990       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1991       *norm = 0.0;
1992       v = amat->a; jj = amat->j;
1993       for (j=0; j<amat->nz; j++) {
1994         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1995       }
1996       v = bmat->a; jj = bmat->j;
1997       for (j=0; j<bmat->nz; j++) {
1998         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1999       }
2000       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2001       for (j=0; j<mat->cmap->N; j++) {
2002         if (tmp2[j] > *norm) *norm = tmp2[j];
2003       }
2004       ierr = PetscFree(tmp);CHKERRQ(ierr);
2005       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2006     } else if (type == NORM_INFINITY) { /* max row norm */
2007       PetscReal ntemp = 0.0;
2008       for (j=0; j<aij->A->rmap->n; j++) {
2009         v = amat->a + amat->i[j];
2010         sum = 0.0;
2011         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2012           sum += PetscAbsScalar(*v); v++;
2013         }
2014         v = bmat->a + bmat->i[j];
2015         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2016           sum += PetscAbsScalar(*v); v++;
2017         }
2018         if (sum > ntemp) ntemp = sum;
2019       }
2020       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2021     } else {
2022       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2023     }
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatTranspose_MPIAIJ"
2030 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2031 {
2032   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2033   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2034   PetscErrorCode ierr;
2035   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2036   PetscInt       cstart=A->cmap->rstart,ncol;
2037   Mat            B;
2038   MatScalar      *array;
2039 
2040   PetscFunctionBegin;
2041   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2042 
2043   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2044   ai = Aloc->i; aj = Aloc->j;
2045   bi = Bloc->i; bj = Bloc->j;
2046   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2047     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2048     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2049     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2050     for (i=0; i<ai[ma]; i++){
2051       d_nnz[aj[i]] ++;
2052       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2053     }
2054 
2055     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2056     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2057     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2058     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2059     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2061     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2062   } else {
2063     B = *matout;
2064     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2065     for (i=0; i<ai[ma]; i++){
2066       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2067     }
2068   }
2069 
2070   /* copy over the A part */
2071   array = Aloc->a;
2072   row = A->rmap->rstart;
2073   for (i=0; i<ma; i++) {
2074     ncol = ai[i+1]-ai[i];
2075     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++; array += ncol; aj += ncol;
2077   }
2078   aj = Aloc->j;
2079   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2080 
2081   /* copy over the B part */
2082   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2083   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2084   array = Bloc->a;
2085   row = A->rmap->rstart;
2086   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2087   cols_tmp = cols;
2088   for (i=0; i<mb; i++) {
2089     ncol = bi[i+1]-bi[i];
2090     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2091     row++; array += ncol; cols_tmp += ncol;
2092   }
2093   ierr = PetscFree(cols);CHKERRQ(ierr);
2094 
2095   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2098     *matout = B;
2099   } else {
2100     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2107 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2108 {
2109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2110   Mat            a = aij->A,b = aij->B;
2111   PetscErrorCode ierr;
2112   PetscInt       s1,s2,s3;
2113 
2114   PetscFunctionBegin;
2115   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2116   if (rr) {
2117     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2118     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2119     /* Overlap communication with computation. */
2120     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2121   }
2122   if (ll) {
2123     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2124     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2125     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2126   }
2127   /* scale  the diagonal block */
2128   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2129 
2130   if (rr) {
2131     /* Do a scatter end and then right scale the off-diagonal block */
2132     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2133     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2134   }
2135 
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIAIJ"
2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156   Mat            a,b,c,d;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A; b = matA->B;
2162   c = matB->A; d = matB->B;
2163 
2164   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2165   if (flg) {
2166     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2167   }
2168   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatCopy_MPIAIJ"
2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175 {
2176   PetscErrorCode ierr;
2177   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179 
2180   PetscFunctionBegin;
2181   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183     /* because of the column compression in the off-processor part of the matrix a->B,
2184        the number of columns in a->B and b->B may be different, hence we cannot call
2185        the MatCopy() directly on the two parts. If need be, we can provide a more
2186        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187        then copying the submatrices */
2188     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2191     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUp_MPIAIJ"
2198 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2199 {
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2211 {
2212   PetscInt          i,m=Y->rmap->N;
2213   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2214   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2215   const PetscInt    *xi = x->i,*yi = y->i;
2216 
2217   PetscFunctionBegin;
2218   /* Set the number of nonzeros in the new matrix */
2219   for(i=0; i<m; i++) {
2220     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2221     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2222     nnz[i] = 0;
2223     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2224       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2225       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2226       nnz[i]++;
2227     }
2228     for (; k<nzy; k++) nnz[i]++;
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatAXPY_MPIAIJ"
2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   PetscInt       i;
2239   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2240   PetscBLASInt   bnz,one=1;
2241   Mat_SeqAIJ     *x,*y;
2242 
2243   PetscFunctionBegin;
2244   if (str == SAME_NONZERO_PATTERN) {
2245     PetscScalar alpha = a;
2246     x = (Mat_SeqAIJ *)xx->A->data;
2247     y = (Mat_SeqAIJ *)yy->A->data;
2248     bnz = PetscBLASIntCast(x->nz);
2249     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2250     x = (Mat_SeqAIJ *)xx->B->data;
2251     y = (Mat_SeqAIJ *)yy->B->data;
2252     bnz = PetscBLASIntCast(x->nz);
2253     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2254   } else if (str == SUBSET_NONZERO_PATTERN) {
2255     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2256 
2257     x = (Mat_SeqAIJ *)xx->B->data;
2258     y = (Mat_SeqAIJ *)yy->B->data;
2259     if (y->xtoy && y->XtoY != xx->B) {
2260       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2261       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2262     }
2263     if (!y->xtoy) { /* get xtoy */
2264       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2265       y->XtoY = xx->B;
2266       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2267     }
2268     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2269   } else {
2270     Mat B;
2271     PetscInt *nnz_d,*nnz_o;
2272     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2273     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2274     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2275     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2276     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2277     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2278     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2279     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2280     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2281     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2282     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2283     ierr = MatHeaderReplace(Y,B);
2284     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2285     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatConjugate_MPIAIJ"
2294 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2295 {
2296 #if defined(PETSC_USE_COMPLEX)
2297   PetscErrorCode ierr;
2298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2302   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2303 #else
2304   PetscFunctionBegin;
2305 #endif
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatRealPart_MPIAIJ"
2311 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2312 {
2313   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2318   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2324 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2325 {
2326   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2327   PetscErrorCode ierr;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2331   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #ifdef PETSC_HAVE_PBGL
2336 
2337 #include <boost/parallel/mpi/bsp_process_group.hpp>
2338 #include <boost/graph/distributed/ilu_default_graph.hpp>
2339 #include <boost/graph/distributed/ilu_0_block.hpp>
2340 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2341 #include <boost/graph/distributed/petsc/interface.hpp>
2342 #include <boost/multi_array.hpp>
2343 #include <boost/parallel/distributed_property_map->hpp>
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2347 /*
2348   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2349 */
2350 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2351 {
2352   namespace petsc = boost::distributed::petsc;
2353 
2354   namespace graph_dist = boost::graph::distributed;
2355   using boost::graph::distributed::ilu_default::process_group_type;
2356   using boost::graph::ilu_permuted;
2357 
2358   PetscBool       row_identity, col_identity;
2359   PetscContainer  c;
2360   PetscInt        m, n, M, N;
2361   PetscErrorCode  ierr;
2362 
2363   PetscFunctionBegin;
2364   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2365   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2366   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2367   if (!row_identity || !col_identity) {
2368     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2369   }
2370 
2371   process_group_type pg;
2372   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2373   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2374   lgraph_type&   level_graph = *lgraph_p;
2375   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2376 
2377   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2378   ilu_permuted(level_graph);
2379 
2380   /* put together the new matrix */
2381   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2382   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2383   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2384   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2385   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2386   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389 
2390   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2391   ierr = PetscContainerSetPointer(c, lgraph_p);
2392   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2393   ierr = PetscContainerDestroy(&c);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2399 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2400 {
2401   PetscFunctionBegin;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatSolve_MPIAIJ"
2407 /*
2408   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2409 */
2410 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2411 {
2412   namespace graph_dist = boost::graph::distributed;
2413 
2414   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2415   lgraph_type*   lgraph_p;
2416   PetscContainer c;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2421   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2422   ierr = VecCopy(b, x);CHKERRQ(ierr);
2423 
2424   PetscScalar* array_x;
2425   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2426   PetscInt sx;
2427   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2428 
2429   PetscScalar* array_b;
2430   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2431   PetscInt sb;
2432   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2433 
2434   lgraph_type&   level_graph = *lgraph_p;
2435   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2436 
2437   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2438   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2439                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2440 
2441   typedef boost::iterator_property_map<array_ref_type::iterator,
2442                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2443   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2444                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2445 
2446   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2447 
2448   PetscFunctionReturn(0);
2449 }
2450 #endif
2451 
2452 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2453   PetscInt       nzlocal,nsends,nrecvs;
2454   PetscMPIInt    *send_rank,*recv_rank;
2455   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2456   PetscScalar    *sbuf_a,**rbuf_a;
2457   PetscErrorCode (*Destroy)(Mat);
2458 } Mat_Redundant;
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2462 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2463 {
2464   PetscErrorCode       ierr;
2465   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2466   PetscInt             i;
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2470   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2471   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2472   for (i=0; i<redund->nrecvs; i++){
2473     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2474     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2475   }
2476   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2477   ierr = PetscFree(redund);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatDestroy_MatRedundant"
2483 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2484 {
2485   PetscErrorCode  ierr;
2486   PetscContainer  container;
2487   Mat_Redundant   *redund=PETSC_NULL;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2491   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2492   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2493   A->ops->destroy = redund->Destroy;
2494   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2495   if (A->ops->destroy) {
2496     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2497   }
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2503 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2504 {
2505   PetscMPIInt    rank,size;
2506   MPI_Comm       comm=((PetscObject)mat)->comm;
2507   PetscErrorCode ierr;
2508   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2509   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2510   PetscInt       *rowrange=mat->rmap->range;
2511   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2512   Mat            A=aij->A,B=aij->B,C=*matredundant;
2513   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2514   PetscScalar    *sbuf_a;
2515   PetscInt       nzlocal=a->nz+b->nz;
2516   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2517   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2518   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2519   MatScalar      *aworkA,*aworkB;
2520   PetscScalar    *vals;
2521   PetscMPIInt    tag1,tag2,tag3,imdex;
2522   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2523                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2524   MPI_Status     recv_status,*send_status;
2525   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2526   PetscInt       **rbuf_j=PETSC_NULL;
2527   PetscScalar    **rbuf_a=PETSC_NULL;
2528   Mat_Redundant  *redund=PETSC_NULL;
2529   PetscContainer container;
2530 
2531   PetscFunctionBegin;
2532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2534 
2535   if (reuse == MAT_REUSE_MATRIX) {
2536     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2537     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2538     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2539     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2540     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2541     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2542     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2543     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2544 
2545     nsends    = redund->nsends;
2546     nrecvs    = redund->nrecvs;
2547     send_rank = redund->send_rank;
2548     recv_rank = redund->recv_rank;
2549     sbuf_nz   = redund->sbuf_nz;
2550     rbuf_nz   = redund->rbuf_nz;
2551     sbuf_j    = redund->sbuf_j;
2552     sbuf_a    = redund->sbuf_a;
2553     rbuf_j    = redund->rbuf_j;
2554     rbuf_a    = redund->rbuf_a;
2555   }
2556 
2557   if (reuse == MAT_INITIAL_MATRIX){
2558     PetscMPIInt  subrank,subsize;
2559     PetscInt     nleftover,np_subcomm;
2560     /* get the destination processors' id send_rank, nsends and nrecvs */
2561     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2562     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2563     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2564     np_subcomm = size/nsubcomm;
2565     nleftover  = size - nsubcomm*np_subcomm;
2566     nsends = 0; nrecvs = 0;
2567     for (i=0; i<size; i++){ /* i=rank*/
2568       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2569         send_rank[nsends] = i; nsends++;
2570         recv_rank[nrecvs++] = i;
2571       }
2572     }
2573     if (rank >= size - nleftover){/* this proc is a leftover processor */
2574       i = size-nleftover-1;
2575       j = 0;
2576       while (j < nsubcomm - nleftover){
2577         send_rank[nsends++] = i;
2578         i--; j++;
2579       }
2580     }
2581 
2582     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2583       for (i=0; i<nleftover; i++){
2584         recv_rank[nrecvs++] = size-nleftover+i;
2585       }
2586     }
2587 
2588     /* allocate sbuf_j, sbuf_a */
2589     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2590     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2591     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2592   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2593 
2594   /* copy mat's local entries into the buffers */
2595   if (reuse == MAT_INITIAL_MATRIX){
2596     rownz_max = 0;
2597     rptr = sbuf_j;
2598     cols = sbuf_j + rend-rstart + 1;
2599     vals = sbuf_a;
2600     rptr[0] = 0;
2601     for (i=0; i<rend-rstart; i++){
2602       row = i + rstart;
2603       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2604       ncols  = nzA + nzB;
2605       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2606       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2607       /* load the column indices for this row into cols */
2608       lwrite = 0;
2609       for (l=0; l<nzB; l++) {
2610         if ((ctmp = bmap[cworkB[l]]) < cstart){
2611           vals[lwrite]   = aworkB[l];
2612           cols[lwrite++] = ctmp;
2613         }
2614       }
2615       for (l=0; l<nzA; l++){
2616         vals[lwrite]   = aworkA[l];
2617         cols[lwrite++] = cstart + cworkA[l];
2618       }
2619       for (l=0; l<nzB; l++) {
2620         if ((ctmp = bmap[cworkB[l]]) >= cend){
2621           vals[lwrite]   = aworkB[l];
2622           cols[lwrite++] = ctmp;
2623         }
2624       }
2625       vals += ncols;
2626       cols += ncols;
2627       rptr[i+1] = rptr[i] + ncols;
2628       if (rownz_max < ncols) rownz_max = ncols;
2629     }
2630     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2631   } else { /* only copy matrix values into sbuf_a */
2632     rptr = sbuf_j;
2633     vals = sbuf_a;
2634     rptr[0] = 0;
2635     for (i=0; i<rend-rstart; i++){
2636       row = i + rstart;
2637       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2638       ncols  = nzA + nzB;
2639       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2640       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2641       lwrite = 0;
2642       for (l=0; l<nzB; l++) {
2643         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2644       }
2645       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2646       for (l=0; l<nzB; l++) {
2647         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2648       }
2649       vals += ncols;
2650       rptr[i+1] = rptr[i] + ncols;
2651     }
2652   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2653 
2654   /* send nzlocal to others, and recv other's nzlocal */
2655   /*--------------------------------------------------*/
2656   if (reuse == MAT_INITIAL_MATRIX){
2657     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2658     s_waits2 = s_waits3 + nsends;
2659     s_waits1 = s_waits2 + nsends;
2660     r_waits1 = s_waits1 + nsends;
2661     r_waits2 = r_waits1 + nrecvs;
2662     r_waits3 = r_waits2 + nrecvs;
2663   } else {
2664     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2665     r_waits3 = s_waits3 + nsends;
2666   }
2667 
2668   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2669   if (reuse == MAT_INITIAL_MATRIX){
2670     /* get new tags to keep the communication clean */
2671     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2672     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2673     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2674 
2675     /* post receives of other's nzlocal */
2676     for (i=0; i<nrecvs; i++){
2677       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2678     }
2679     /* send nzlocal to others */
2680     for (i=0; i<nsends; i++){
2681       sbuf_nz[i] = nzlocal;
2682       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2683     }
2684     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2685     count = nrecvs;
2686     while (count) {
2687       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2688       recv_rank[imdex] = recv_status.MPI_SOURCE;
2689       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2690       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2691 
2692       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2693       rbuf_nz[imdex] += i + 2;
2694       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2695       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2696       count--;
2697     }
2698     /* wait on sends of nzlocal */
2699     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2700     /* send mat->i,j to others, and recv from other's */
2701     /*------------------------------------------------*/
2702     for (i=0; i<nsends; i++){
2703       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2704       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2705     }
2706     /* wait on receives of mat->i,j */
2707     /*------------------------------*/
2708     count = nrecvs;
2709     while (count) {
2710       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2711       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2712       count--;
2713     }
2714     /* wait on sends of mat->i,j */
2715     /*---------------------------*/
2716     if (nsends) {
2717       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2718     }
2719   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2720 
2721   /* post receives, send and receive mat->a */
2722   /*----------------------------------------*/
2723   for (imdex=0; imdex<nrecvs; imdex++) {
2724     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2725   }
2726   for (i=0; i<nsends; i++){
2727     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2728   }
2729   count = nrecvs;
2730   while (count) {
2731     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2732     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2733     count--;
2734   }
2735   if (nsends) {
2736     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2737   }
2738 
2739   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2740 
2741   /* create redundant matrix */
2742   /*-------------------------*/
2743   if (reuse == MAT_INITIAL_MATRIX){
2744     /* compute rownz_max for preallocation */
2745     for (imdex=0; imdex<nrecvs; imdex++){
2746       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2747       rptr = rbuf_j[imdex];
2748       for (i=0; i<j; i++){
2749         ncols = rptr[i+1] - rptr[i];
2750         if (rownz_max < ncols) rownz_max = ncols;
2751       }
2752     }
2753 
2754     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2755     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2756     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2757     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2758     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2759     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2760   } else {
2761     C = *matredundant;
2762   }
2763 
2764   /* insert local matrix entries */
2765   rptr = sbuf_j;
2766   cols = sbuf_j + rend-rstart + 1;
2767   vals = sbuf_a;
2768   for (i=0; i<rend-rstart; i++){
2769     row   = i + rstart;
2770     ncols = rptr[i+1] - rptr[i];
2771     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2772     vals += ncols;
2773     cols += ncols;
2774   }
2775   /* insert received matrix entries */
2776   for (imdex=0; imdex<nrecvs; imdex++){
2777     rstart = rowrange[recv_rank[imdex]];
2778     rend   = rowrange[recv_rank[imdex]+1];
2779     rptr = rbuf_j[imdex];
2780     cols = rbuf_j[imdex] + rend-rstart + 1;
2781     vals = rbuf_a[imdex];
2782     for (i=0; i<rend-rstart; i++){
2783       row   = i + rstart;
2784       ncols = rptr[i+1] - rptr[i];
2785       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2786       vals += ncols;
2787       cols += ncols;
2788     }
2789   }
2790   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2793   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2794   if (reuse == MAT_INITIAL_MATRIX) {
2795     PetscContainer container;
2796     *matredundant = C;
2797     /* create a supporting struct and attach it to C for reuse */
2798     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2799     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2800     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2801     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2802     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2803     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2804 
2805     redund->nzlocal = nzlocal;
2806     redund->nsends  = nsends;
2807     redund->nrecvs  = nrecvs;
2808     redund->send_rank = send_rank;
2809     redund->recv_rank = recv_rank;
2810     redund->sbuf_nz = sbuf_nz;
2811     redund->rbuf_nz = rbuf_nz;
2812     redund->sbuf_j  = sbuf_j;
2813     redund->sbuf_a  = sbuf_a;
2814     redund->rbuf_j  = rbuf_j;
2815     redund->rbuf_a  = rbuf_a;
2816 
2817     redund->Destroy = C->ops->destroy;
2818     C->ops->destroy = MatDestroy_MatRedundant;
2819   }
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2826 {
2827   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2828   PetscErrorCode ierr;
2829   PetscInt       i,*idxb = 0;
2830   PetscScalar    *va,*vb;
2831   Vec            vtmp;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2835   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2836   if (idx) {
2837     for (i=0; i<A->rmap->n; i++) {
2838       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2839     }
2840   }
2841 
2842   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2843   if (idx) {
2844     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2845   }
2846   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2847   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2848 
2849   for (i=0; i<A->rmap->n; i++){
2850     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2851       va[i] = vb[i];
2852       if (idx) idx[i] = a->garray[idxb[i]];
2853     }
2854   }
2855 
2856   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2857   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2858   ierr = PetscFree(idxb);CHKERRQ(ierr);
2859   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2866 {
2867   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2868   PetscErrorCode ierr;
2869   PetscInt       i,*idxb = 0;
2870   PetscScalar    *va,*vb;
2871   Vec            vtmp;
2872 
2873   PetscFunctionBegin;
2874   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2875   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2876   if (idx) {
2877     for (i=0; i<A->cmap->n; i++) {
2878       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2879     }
2880   }
2881 
2882   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2883   if (idx) {
2884     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2885   }
2886   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2887   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2888 
2889   for (i=0; i<A->rmap->n; i++){
2890     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2891       va[i] = vb[i];
2892       if (idx) idx[i] = a->garray[idxb[i]];
2893     }
2894   }
2895 
2896   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2897   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2898   ierr = PetscFree(idxb);CHKERRQ(ierr);
2899   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2906 {
2907   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2908   PetscInt       n      = A->rmap->n;
2909   PetscInt       cstart = A->cmap->rstart;
2910   PetscInt      *cmap   = mat->garray;
2911   PetscInt      *diagIdx, *offdiagIdx;
2912   Vec            diagV, offdiagV;
2913   PetscScalar   *a, *diagA, *offdiagA;
2914   PetscInt       r;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2919   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2920   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2921   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2922   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2923   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2924   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2925   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2926   for(r = 0; r < n; ++r) {
2927     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2928       a[r]   = diagA[r];
2929       idx[r] = cstart + diagIdx[r];
2930     } else {
2931       a[r]   = offdiagA[r];
2932       idx[r] = cmap[offdiagIdx[r]];
2933     }
2934   }
2935   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2939   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2940   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #undef __FUNCT__
2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2947 {
2948   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2949   PetscInt       n      = A->rmap->n;
2950   PetscInt       cstart = A->cmap->rstart;
2951   PetscInt      *cmap   = mat->garray;
2952   PetscInt      *diagIdx, *offdiagIdx;
2953   Vec            diagV, offdiagV;
2954   PetscScalar   *a, *diagA, *offdiagA;
2955   PetscInt       r;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2960   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2961   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2962   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2963   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2964   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2965   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2966   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2967   for(r = 0; r < n; ++r) {
2968     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2969       a[r]   = diagA[r];
2970       idx[r] = cstart + diagIdx[r];
2971     } else {
2972       a[r]   = offdiagA[r];
2973       idx[r] = cmap[offdiagIdx[r]];
2974     }
2975   }
2976   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2978   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2979   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2980   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2981   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2988 {
2989   PetscErrorCode ierr;
2990   Mat            *dummy;
2991 
2992   PetscFunctionBegin;
2993   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2994   *newmat = *dummy;
2995   ierr = PetscFree(dummy);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3000 
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3003 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3004 {
3005   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3006   PetscErrorCode ierr;
3007 
3008   PetscFunctionBegin;
3009   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 
3014 /* -------------------------------------------------------------------*/
3015 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3016        MatGetRow_MPIAIJ,
3017        MatRestoreRow_MPIAIJ,
3018        MatMult_MPIAIJ,
3019 /* 4*/ MatMultAdd_MPIAIJ,
3020        MatMultTranspose_MPIAIJ,
3021        MatMultTransposeAdd_MPIAIJ,
3022 #ifdef PETSC_HAVE_PBGL
3023        MatSolve_MPIAIJ,
3024 #else
3025        0,
3026 #endif
3027        0,
3028        0,
3029 /*10*/ 0,
3030        0,
3031        0,
3032        MatSOR_MPIAIJ,
3033        MatTranspose_MPIAIJ,
3034 /*15*/ MatGetInfo_MPIAIJ,
3035        MatEqual_MPIAIJ,
3036        MatGetDiagonal_MPIAIJ,
3037        MatDiagonalScale_MPIAIJ,
3038        MatNorm_MPIAIJ,
3039 /*20*/ MatAssemblyBegin_MPIAIJ,
3040        MatAssemblyEnd_MPIAIJ,
3041        MatSetOption_MPIAIJ,
3042        MatZeroEntries_MPIAIJ,
3043 /*24*/ MatZeroRows_MPIAIJ,
3044        0,
3045 #ifdef PETSC_HAVE_PBGL
3046        0,
3047 #else
3048        0,
3049 #endif
3050        0,
3051        0,
3052 /*29*/ MatSetUp_MPIAIJ,
3053 #ifdef PETSC_HAVE_PBGL
3054        0,
3055 #else
3056        0,
3057 #endif
3058        0,
3059        0,
3060        0,
3061 /*34*/ MatDuplicate_MPIAIJ,
3062        0,
3063        0,
3064        0,
3065        0,
3066 /*39*/ MatAXPY_MPIAIJ,
3067        MatGetSubMatrices_MPIAIJ,
3068        MatIncreaseOverlap_MPIAIJ,
3069        MatGetValues_MPIAIJ,
3070        MatCopy_MPIAIJ,
3071 /*44*/ MatGetRowMax_MPIAIJ,
3072        MatScale_MPIAIJ,
3073        0,
3074        0,
3075        MatZeroRowsColumns_MPIAIJ,
3076 /*49*/ 0,
3077        0,
3078        0,
3079        0,
3080        0,
3081 /*54*/ MatFDColoringCreate_MPIAIJ,
3082        0,
3083        MatSetUnfactored_MPIAIJ,
3084        0, /* MatPermute_MPIAIJ, impl currently broken */
3085        0,
3086 /*59*/ MatGetSubMatrix_MPIAIJ,
3087        MatDestroy_MPIAIJ,
3088        MatView_MPIAIJ,
3089        0,
3090        0,
3091 /*64*/ 0,
3092        0,
3093        0,
3094        0,
3095        0,
3096 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3097        MatGetRowMinAbs_MPIAIJ,
3098        0,
3099        MatSetColoring_MPIAIJ,
3100 #if defined(PETSC_HAVE_ADIC)
3101        MatSetValuesAdic_MPIAIJ,
3102 #else
3103        0,
3104 #endif
3105        MatSetValuesAdifor_MPIAIJ,
3106 /*75*/ MatFDColoringApply_AIJ,
3107        0,
3108        0,
3109        0,
3110        MatFindZeroDiagonals_MPIAIJ,
3111 /*80*/ 0,
3112        0,
3113        0,
3114 /*83*/ MatLoad_MPIAIJ,
3115        0,
3116        0,
3117        0,
3118        0,
3119        0,
3120 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3121        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3122        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3123        MatPtAP_Basic,
3124        MatPtAPSymbolic_MPIAIJ,
3125 /*94*/ MatPtAPNumeric_MPIAIJ,
3126        0,
3127        0,
3128        0,
3129        0,
3130 /*99*/ 0,
3131        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3132        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3133        MatConjugate_MPIAIJ,
3134        0,
3135 /*104*/MatSetValuesRow_MPIAIJ,
3136        MatRealPart_MPIAIJ,
3137        MatImaginaryPart_MPIAIJ,
3138        0,
3139        0,
3140 /*109*/0,
3141        MatGetRedundantMatrix_MPIAIJ,
3142        MatGetRowMin_MPIAIJ,
3143        0,
3144        0,
3145 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3146        0,
3147        0,
3148        0,
3149        0,
3150 /*119*/0,
3151        0,
3152        0,
3153        0,
3154        MatGetMultiProcBlock_MPIAIJ,
3155 /*124*/MatFindNonzeroRows_MPIAIJ,
3156        MatGetColumnNorms_MPIAIJ,
3157        MatInvertBlockDiagonal_MPIAIJ,
3158        0,
3159        MatGetSubMatricesParallel_MPIAIJ,
3160 /*129*/0,
3161        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3162        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3163        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3164        0,
3165 /*134*/0,
3166        0,
3167        0,
3168        0,
3169        0
3170 };
3171 
3172 /* ----------------------------------------------------------------------------------------*/
3173 
3174 EXTERN_C_BEGIN
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3177 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3178 {
3179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3180   PetscErrorCode ierr;
3181 
3182   PetscFunctionBegin;
3183   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3184   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 EXTERN_C_END
3188 
3189 EXTERN_C_BEGIN
3190 #undef __FUNCT__
3191 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3192 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3193 {
3194   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3195   PetscErrorCode ierr;
3196 
3197   PetscFunctionBegin;
3198   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3199   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3200   PetscFunctionReturn(0);
3201 }
3202 EXTERN_C_END
3203 
3204 EXTERN_C_BEGIN
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3207 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3208 {
3209   Mat_MPIAIJ     *b;
3210   PetscErrorCode ierr;
3211   PetscInt       i;
3212   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3213 
3214   PetscFunctionBegin;
3215   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3216   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3217   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3218   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3219   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3220   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3221 
3222   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3223   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3224   if (d_nnz) {
3225     for (i=0; i<B->rmap->n; i++) {
3226       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3227     }
3228   }
3229   if (o_nnz) {
3230     for (i=0; i<B->rmap->n; i++) {
3231       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3232     }
3233   }
3234   b = (Mat_MPIAIJ*)B->data;
3235 
3236   if (!B->preallocated) {
3237     /* Explicitly create 2 MATSEQAIJ matrices. */
3238     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3239     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3240     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3241     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3242     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3243     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3244     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3245     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3246     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3247     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3248   }
3249 
3250   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3251   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3252   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3253   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3254   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3255   B->preallocated = PETSC_TRUE;
3256   PetscFunctionReturn(0);
3257 }
3258 EXTERN_C_END
3259 
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3262 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3263 {
3264   Mat            mat;
3265   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3266   PetscErrorCode ierr;
3267 
3268   PetscFunctionBegin;
3269   *newmat       = 0;
3270   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3271   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3272   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3273   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3274   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3275   a    = (Mat_MPIAIJ*)mat->data;
3276 
3277   mat->factortype    = matin->factortype;
3278   mat->rmap->bs      = matin->rmap->bs;
3279   mat->cmap->bs      = matin->cmap->bs;
3280   mat->assembled    = PETSC_TRUE;
3281   mat->insertmode   = NOT_SET_VALUES;
3282   mat->preallocated = PETSC_TRUE;
3283 
3284   a->size           = oldmat->size;
3285   a->rank           = oldmat->rank;
3286   a->donotstash     = oldmat->donotstash;
3287   a->roworiented    = oldmat->roworiented;
3288   a->rowindices     = 0;
3289   a->rowvalues      = 0;
3290   a->getrowactive   = PETSC_FALSE;
3291 
3292   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3293   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3294 
3295   if (oldmat->colmap) {
3296 #if defined (PETSC_USE_CTABLE)
3297     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3298 #else
3299     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3300     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3302 #endif
3303   } else a->colmap = 0;
3304   if (oldmat->garray) {
3305     PetscInt len;
3306     len  = oldmat->B->cmap->n;
3307     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3308     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3309     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3310   } else a->garray = 0;
3311 
3312   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3313   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3314   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3315   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3316   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3317   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3318   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3319   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3320   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3321   *newmat = mat;
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 
3326 
3327 #undef __FUNCT__
3328 #define __FUNCT__ "MatLoad_MPIAIJ"
3329 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3330 {
3331   PetscScalar    *vals,*svals;
3332   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3333   PetscErrorCode ierr;
3334   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3335   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3336   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3337   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3338   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3339   int            fd;
3340   PetscInt       bs = 1;
3341 
3342   PetscFunctionBegin;
3343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3344   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3345   if (!rank) {
3346     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3347     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3348     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3349   }
3350 
3351   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3352   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3353   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3354 
3355   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3356 
3357   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3358   M = header[1]; N = header[2];
3359   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3360   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3361   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3362 
3363   /* If global sizes are set, check if they are consistent with that given in the file */
3364   if (sizesset) {
3365     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3366   }
3367   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3368   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3369 
3370   /* determine ownership of all (block) rows */
3371   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3372   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3373   else m = newMat->rmap->n; /* Set by user */
3374 
3375   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3376   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3377 
3378   /* First process needs enough room for process with most rows */
3379   if (!rank) {
3380     mmax       = rowners[1];
3381     for (i=2; i<size; i++) {
3382       mmax = PetscMax(mmax,rowners[i]);
3383     }
3384   } else mmax = m;
3385 
3386   rowners[0] = 0;
3387   for (i=2; i<=size; i++) {
3388     rowners[i] += rowners[i-1];
3389   }
3390   rstart = rowners[rank];
3391   rend   = rowners[rank+1];
3392 
3393   /* distribute row lengths to all processors */
3394   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3395   if (!rank) {
3396     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3397     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3398     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3399     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3400     for (j=0; j<m; j++) {
3401       procsnz[0] += ourlens[j];
3402     }
3403     for (i=1; i<size; i++) {
3404       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3405       /* calculate the number of nonzeros on each processor */
3406       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3407         procsnz[i] += rowlengths[j];
3408       }
3409       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3410     }
3411     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3412   } else {
3413     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3414   }
3415 
3416   if (!rank) {
3417     /* determine max buffer needed and allocate it */
3418     maxnz = 0;
3419     for (i=0; i<size; i++) {
3420       maxnz = PetscMax(maxnz,procsnz[i]);
3421     }
3422     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3423 
3424     /* read in my part of the matrix column indices  */
3425     nz   = procsnz[0];
3426     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3427     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3428 
3429     /* read in every one elses and ship off */
3430     for (i=1; i<size; i++) {
3431       nz     = procsnz[i];
3432       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3433       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3434     }
3435     ierr = PetscFree(cols);CHKERRQ(ierr);
3436   } else {
3437     /* determine buffer space needed for message */
3438     nz = 0;
3439     for (i=0; i<m; i++) {
3440       nz += ourlens[i];
3441     }
3442     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3443 
3444     /* receive message of column indices*/
3445     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3446   }
3447 
3448   /* determine column ownership if matrix is not square */
3449   if (N != M) {
3450     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3451     else n = newMat->cmap->n;
3452     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3453     cstart = cend - n;
3454   } else {
3455     cstart = rstart;
3456     cend   = rend;
3457     n      = cend - cstart;
3458   }
3459 
3460   /* loop over local rows, determining number of off diagonal entries */
3461   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3462   jj = 0;
3463   for (i=0; i<m; i++) {
3464     for (j=0; j<ourlens[i]; j++) {
3465       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3466       jj++;
3467     }
3468   }
3469 
3470   for (i=0; i<m; i++) {
3471     ourlens[i] -= offlens[i];
3472   }
3473   if (!sizesset) {
3474     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3475   }
3476 
3477   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3478 
3479   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3480 
3481   for (i=0; i<m; i++) {
3482     ourlens[i] += offlens[i];
3483   }
3484 
3485   if (!rank) {
3486     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3487 
3488     /* read in my part of the matrix numerical values  */
3489     nz   = procsnz[0];
3490     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3491 
3492     /* insert into matrix */
3493     jj      = rstart;
3494     smycols = mycols;
3495     svals   = vals;
3496     for (i=0; i<m; i++) {
3497       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3498       smycols += ourlens[i];
3499       svals   += ourlens[i];
3500       jj++;
3501     }
3502 
3503     /* read in other processors and ship out */
3504     for (i=1; i<size; i++) {
3505       nz     = procsnz[i];
3506       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3507       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3510   } else {
3511     /* receive numeric values */
3512     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3513 
3514     /* receive message of values*/
3515     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3516 
3517     /* insert into matrix */
3518     jj      = rstart;
3519     smycols = mycols;
3520     svals   = vals;
3521     for (i=0; i<m; i++) {
3522       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3523       smycols += ourlens[i];
3524       svals   += ourlens[i];
3525       jj++;
3526     }
3527   }
3528   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3529   ierr = PetscFree(vals);CHKERRQ(ierr);
3530   ierr = PetscFree(mycols);CHKERRQ(ierr);
3531   ierr = PetscFree(rowners);CHKERRQ(ierr);
3532   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534 
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3540 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3541 {
3542   PetscErrorCode ierr;
3543   IS             iscol_local;
3544   PetscInt       csize;
3545 
3546   PetscFunctionBegin;
3547   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3548   if (call == MAT_REUSE_MATRIX) {
3549     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3550     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3551   } else {
3552     PetscInt cbs;
3553     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3554     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3555     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3556   }
3557   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3558   if (call == MAT_INITIAL_MATRIX) {
3559     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3560     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3561   }
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3568 /*
3569     Not great since it makes two copies of the submatrix, first an SeqAIJ
3570   in local and then by concatenating the local matrices the end result.
3571   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3572 
3573   Note: This requires a sequential iscol with all indices.
3574 */
3575 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3576 {
3577   PetscErrorCode ierr;
3578   PetscMPIInt    rank,size;
3579   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3580   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3581   PetscBool      allcolumns, colflag;
3582   Mat            M,Mreuse;
3583   MatScalar      *vwork,*aa;
3584   MPI_Comm       comm = ((PetscObject)mat)->comm;
3585   Mat_SeqAIJ     *aij;
3586 
3587 
3588   PetscFunctionBegin;
3589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3591 
3592   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3593   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3594   if (colflag && ncol == mat->cmap->N){
3595     allcolumns = PETSC_TRUE;
3596   } else {
3597     allcolumns = PETSC_FALSE;
3598   }
3599   if (call ==  MAT_REUSE_MATRIX) {
3600     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3601     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3602     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3603   } else {
3604     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3605   }
3606 
3607   /*
3608       m - number of local rows
3609       n - number of columns (same on all processors)
3610       rstart - first row in new global matrix generated
3611   */
3612   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3613   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3614   if (call == MAT_INITIAL_MATRIX) {
3615     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3616     ii  = aij->i;
3617     jj  = aij->j;
3618 
3619     /*
3620         Determine the number of non-zeros in the diagonal and off-diagonal
3621         portions of the matrix in order to do correct preallocation
3622     */
3623 
3624     /* first get start and end of "diagonal" columns */
3625     if (csize == PETSC_DECIDE) {
3626       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3627       if (mglobal == n) { /* square matrix */
3628 	nlocal = m;
3629       } else {
3630         nlocal = n/size + ((n % size) > rank);
3631       }
3632     } else {
3633       nlocal = csize;
3634     }
3635     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3636     rstart = rend - nlocal;
3637     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3638 
3639     /* next, compute all the lengths */
3640     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3641     olens = dlens + m;
3642     for (i=0; i<m; i++) {
3643       jend = ii[i+1] - ii[i];
3644       olen = 0;
3645       dlen = 0;
3646       for (j=0; j<jend; j++) {
3647         if (*jj < rstart || *jj >= rend) olen++;
3648         else dlen++;
3649         jj++;
3650       }
3651       olens[i] = olen;
3652       dlens[i] = dlen;
3653     }
3654     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3655     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3656     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3657     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3658     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3659     ierr = PetscFree(dlens);CHKERRQ(ierr);
3660   } else {
3661     PetscInt ml,nl;
3662 
3663     M = *newmat;
3664     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3665     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3666     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3667     /*
3668          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3669        rather than the slower MatSetValues().
3670     */
3671     M->was_assembled = PETSC_TRUE;
3672     M->assembled     = PETSC_FALSE;
3673   }
3674   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3675   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3676   ii  = aij->i;
3677   jj  = aij->j;
3678   aa  = aij->a;
3679   for (i=0; i<m; i++) {
3680     row   = rstart + i;
3681     nz    = ii[i+1] - ii[i];
3682     cwork = jj;     jj += nz;
3683     vwork = aa;     aa += nz;
3684     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3685   }
3686 
3687   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   *newmat = M;
3690 
3691   /* save submatrix used in processor for next request */
3692   if (call ==  MAT_INITIAL_MATRIX) {
3693     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3694     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3695   }
3696 
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 EXTERN_C_BEGIN
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3703 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3704 {
3705   PetscInt       m,cstart, cend,j,nnz,i,d;
3706   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3707   const PetscInt *JJ;
3708   PetscScalar    *values;
3709   PetscErrorCode ierr;
3710 
3711   PetscFunctionBegin;
3712   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3713 
3714   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3715   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3716   m      = B->rmap->n;
3717   cstart = B->cmap->rstart;
3718   cend   = B->cmap->rend;
3719   rstart = B->rmap->rstart;
3720 
3721   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3722 
3723 #if defined(PETSC_USE_DEBUGGING)
3724   for (i=0; i<m; i++) {
3725     nnz     = Ii[i+1]- Ii[i];
3726     JJ      = J + Ii[i];
3727     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3728     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3729     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3730   }
3731 #endif
3732 
3733   for (i=0; i<m; i++) {
3734     nnz     = Ii[i+1]- Ii[i];
3735     JJ      = J + Ii[i];
3736     nnz_max = PetscMax(nnz_max,nnz);
3737     d       = 0;
3738     for (j=0; j<nnz; j++) {
3739       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3740     }
3741     d_nnz[i] = d;
3742     o_nnz[i] = nnz - d;
3743   }
3744   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3745   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3746 
3747   if (v) values = (PetscScalar*)v;
3748   else {
3749     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3750     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3751   }
3752 
3753   for (i=0; i<m; i++) {
3754     ii   = i + rstart;
3755     nnz  = Ii[i+1]- Ii[i];
3756     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3757   }
3758   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3759   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760 
3761   if (!v) {
3762     ierr = PetscFree(values);CHKERRQ(ierr);
3763   }
3764   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 EXTERN_C_END
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3771 /*@
3772    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3773    (the default parallel PETSc format).
3774 
3775    Collective on MPI_Comm
3776 
3777    Input Parameters:
3778 +  B - the matrix
3779 .  i - the indices into j for the start of each local row (starts with zero)
3780 .  j - the column indices for each local row (starts with zero)
3781 -  v - optional values in the matrix
3782 
3783    Level: developer
3784 
3785    Notes:
3786        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3787      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3788      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3789 
3790        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3791 
3792        The format which is used for the sparse matrix input, is equivalent to a
3793     row-major ordering.. i.e for the following matrix, the input data expected is
3794     as shown:
3795 
3796         1 0 0
3797         2 0 3     P0
3798        -------
3799         4 5 6     P1
3800 
3801      Process0 [P0]: rows_owned=[0,1]
3802         i =  {0,1,3}  [size = nrow+1  = 2+1]
3803         j =  {0,0,2}  [size = nz = 6]
3804         v =  {1,2,3}  [size = nz = 6]
3805 
3806      Process1 [P1]: rows_owned=[2]
3807         i =  {0,3}    [size = nrow+1  = 1+1]
3808         j =  {0,1,2}  [size = nz = 6]
3809         v =  {4,5,6}  [size = nz = 6]
3810 
3811 .keywords: matrix, aij, compressed row, sparse, parallel
3812 
3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3814           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3815 @*/
3816 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3817 {
3818   PetscErrorCode ierr;
3819 
3820   PetscFunctionBegin;
3821   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3827 /*@C
3828    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3829    (the default parallel PETSc format).  For good matrix assembly performance
3830    the user should preallocate the matrix storage by setting the parameters
3831    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3832    performance can be increased by more than a factor of 50.
3833 
3834    Collective on MPI_Comm
3835 
3836    Input Parameters:
3837 +  A - the matrix
3838 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3839            (same value is used for all local rows)
3840 .  d_nnz - array containing the number of nonzeros in the various rows of the
3841            DIAGONAL portion of the local submatrix (possibly different for each row)
3842            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3843            The size of this array is equal to the number of local rows, i.e 'm'.
3844            For matrices that will be factored, you must leave room for (and set)
3845            the diagonal entry even if it is zero.
3846 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3847            submatrix (same value is used for all local rows).
3848 -  o_nnz - array containing the number of nonzeros in the various rows of the
3849            OFF-DIAGONAL portion of the local submatrix (possibly different for
3850            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3851            structure. The size of this array is equal to the number
3852            of local rows, i.e 'm'.
3853 
3854    If the *_nnz parameter is given then the *_nz parameter is ignored
3855 
3856    The AIJ format (also called the Yale sparse matrix format or
3857    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3858    storage.  The stored row and column indices begin with zero.
3859    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3860 
3861    The parallel matrix is partitioned such that the first m0 rows belong to
3862    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3863    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3864 
3865    The DIAGONAL portion of the local submatrix of a processor can be defined
3866    as the submatrix which is obtained by extraction the part corresponding to
3867    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3868    first row that belongs to the processor, r2 is the last row belonging to
3869    the this processor, and c1-c2 is range of indices of the local part of a
3870    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3871    common case of a square matrix, the row and column ranges are the same and
3872    the DIAGONAL part is also square. The remaining portion of the local
3873    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3874 
3875    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3876 
3877    You can call MatGetInfo() to get information on how effective the preallocation was;
3878    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3879    You can also run with the option -info and look for messages with the string
3880    malloc in them to see if additional memory allocation was needed.
3881 
3882    Example usage:
3883 
3884    Consider the following 8x8 matrix with 34 non-zero values, that is
3885    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3886    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3887    as follows:
3888 
3889 .vb
3890             1  2  0  |  0  3  0  |  0  4
3891     Proc0   0  5  6  |  7  0  0  |  8  0
3892             9  0 10  | 11  0  0  | 12  0
3893     -------------------------------------
3894            13  0 14  | 15 16 17  |  0  0
3895     Proc1   0 18  0  | 19 20 21  |  0  0
3896             0  0  0  | 22 23  0  | 24  0
3897     -------------------------------------
3898     Proc2  25 26 27  |  0  0 28  | 29  0
3899            30  0  0  | 31 32 33  |  0 34
3900 .ve
3901 
3902    This can be represented as a collection of submatrices as:
3903 
3904 .vb
3905       A B C
3906       D E F
3907       G H I
3908 .ve
3909 
3910    Where the submatrices A,B,C are owned by proc0, D,E,F are
3911    owned by proc1, G,H,I are owned by proc2.
3912 
3913    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3915    The 'M','N' parameters are 8,8, and have the same values on all procs.
3916 
3917    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3918    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3919    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3920    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3921    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3922    matrix, ans [DF] as another SeqAIJ matrix.
3923 
3924    When d_nz, o_nz parameters are specified, d_nz storage elements are
3925    allocated for every row of the local diagonal submatrix, and o_nz
3926    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3927    One way to choose d_nz and o_nz is to use the max nonzerors per local
3928    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3929    In this case, the values of d_nz,o_nz are:
3930 .vb
3931      proc0 : dnz = 2, o_nz = 2
3932      proc1 : dnz = 3, o_nz = 2
3933      proc2 : dnz = 1, o_nz = 4
3934 .ve
3935    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3936    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3937    for proc3. i.e we are using 12+15+10=37 storage locations to store
3938    34 values.
3939 
3940    When d_nnz, o_nnz parameters are specified, the storage is specified
3941    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3942    In the above case the values for d_nnz,o_nnz are:
3943 .vb
3944      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3945      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3946      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3947 .ve
3948    Here the space allocated is sum of all the above values i.e 34, and
3949    hence pre-allocation is perfect.
3950 
3951    Level: intermediate
3952 
3953 .keywords: matrix, aij, compressed row, sparse, parallel
3954 
3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3956           MPIAIJ, MatGetInfo()
3957 @*/
3958 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3959 {
3960   PetscErrorCode ierr;
3961 
3962   PetscFunctionBegin;
3963   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3964   PetscValidType(B,1);
3965   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3971 /*@
3972      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3973          CSR format the local rows.
3974 
3975    Collective on MPI_Comm
3976 
3977    Input Parameters:
3978 +  comm - MPI communicator
3979 .  m - number of local rows (Cannot be PETSC_DECIDE)
3980 .  n - This value should be the same as the local size used in creating the
3981        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3982        calculated if N is given) For square matrices n is almost always m.
3983 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3984 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3985 .   i - row indices
3986 .   j - column indices
3987 -   a - matrix values
3988 
3989    Output Parameter:
3990 .   mat - the matrix
3991 
3992    Level: intermediate
3993 
3994    Notes:
3995        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3996      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3997      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3998 
3999        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4000 
4001        The format which is used for the sparse matrix input, is equivalent to a
4002     row-major ordering.. i.e for the following matrix, the input data expected is
4003     as shown:
4004 
4005         1 0 0
4006         2 0 3     P0
4007        -------
4008         4 5 6     P1
4009 
4010      Process0 [P0]: rows_owned=[0,1]
4011         i =  {0,1,3}  [size = nrow+1  = 2+1]
4012         j =  {0,0,2}  [size = nz = 6]
4013         v =  {1,2,3}  [size = nz = 6]
4014 
4015      Process1 [P1]: rows_owned=[2]
4016         i =  {0,3}    [size = nrow+1  = 1+1]
4017         j =  {0,1,2}  [size = nz = 6]
4018         v =  {4,5,6}  [size = nz = 6]
4019 
4020 .keywords: matrix, aij, compressed row, sparse, parallel
4021 
4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4023           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4024 @*/
4025 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4026 {
4027   PetscErrorCode ierr;
4028 
4029   PetscFunctionBegin;
4030   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4031   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4032   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4033   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4034   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4035   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4036   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4037   PetscFunctionReturn(0);
4038 }
4039 
4040 #undef __FUNCT__
4041 #define __FUNCT__ "MatCreateAIJ"
4042 /*@C
4043    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4044    (the default parallel PETSc format).  For good matrix assembly performance
4045    the user should preallocate the matrix storage by setting the parameters
4046    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4047    performance can be increased by more than a factor of 50.
4048 
4049    Collective on MPI_Comm
4050 
4051    Input Parameters:
4052 +  comm - MPI communicator
4053 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4054            This value should be the same as the local size used in creating the
4055            y vector for the matrix-vector product y = Ax.
4056 .  n - This value should be the same as the local size used in creating the
4057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4058        calculated if N is given) For square matrices n is almost always m.
4059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4061 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4062            (same value is used for all local rows)
4063 .  d_nnz - array containing the number of nonzeros in the various rows of the
4064            DIAGONAL portion of the local submatrix (possibly different for each row)
4065            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4066            The size of this array is equal to the number of local rows, i.e 'm'.
4067 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4068            submatrix (same value is used for all local rows).
4069 -  o_nnz - array containing the number of nonzeros in the various rows of the
4070            OFF-DIAGONAL portion of the local submatrix (possibly different for
4071            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4072            structure. The size of this array is equal to the number
4073            of local rows, i.e 'm'.
4074 
4075    Output Parameter:
4076 .  A - the matrix
4077 
4078    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4079    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4080    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4081 
4082    Notes:
4083    If the *_nnz parameter is given then the *_nz parameter is ignored
4084 
4085    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4086    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4087    storage requirements for this matrix.
4088 
4089    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4090    processor than it must be used on all processors that share the object for
4091    that argument.
4092 
4093    The user MUST specify either the local or global matrix dimensions
4094    (possibly both).
4095 
4096    The parallel matrix is partitioned across processors such that the
4097    first m0 rows belong to process 0, the next m1 rows belong to
4098    process 1, the next m2 rows belong to process 2 etc.. where
4099    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4100    values corresponding to [m x N] submatrix.
4101 
4102    The columns are logically partitioned with the n0 columns belonging
4103    to 0th partition, the next n1 columns belonging to the next
4104    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4105 
4106    The DIAGONAL portion of the local submatrix on any given processor
4107    is the submatrix corresponding to the rows and columns m,n
4108    corresponding to the given processor. i.e diagonal matrix on
4109    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4110    etc. The remaining portion of the local submatrix [m x (N-n)]
4111    constitute the OFF-DIAGONAL portion. The example below better
4112    illustrates this concept.
4113 
4114    For a square global matrix we define each processor's diagonal portion
4115    to be its local rows and the corresponding columns (a square submatrix);
4116    each processor's off-diagonal portion encompasses the remainder of the
4117    local matrix (a rectangular submatrix).
4118 
4119    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4120 
4121    When calling this routine with a single process communicator, a matrix of
4122    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4123    type of communicator, use the construction mechanism:
4124      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4125 
4126    By default, this format uses inodes (identical nodes) when possible.
4127    We search for consecutive rows with the same nonzero structure, thereby
4128    reusing matrix information to achieve increased efficiency.
4129 
4130    Options Database Keys:
4131 +  -mat_no_inode  - Do not use inodes
4132 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4133 -  -mat_aij_oneindex - Internally use indexing starting at 1
4134         rather than 0.  Note that when calling MatSetValues(),
4135         the user still MUST index entries starting at 0!
4136 
4137 
4138    Example usage:
4139 
4140    Consider the following 8x8 matrix with 34 non-zero values, that is
4141    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4142    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4143    as follows:
4144 
4145 .vb
4146             1  2  0  |  0  3  0  |  0  4
4147     Proc0   0  5  6  |  7  0  0  |  8  0
4148             9  0 10  | 11  0  0  | 12  0
4149     -------------------------------------
4150            13  0 14  | 15 16 17  |  0  0
4151     Proc1   0 18  0  | 19 20 21  |  0  0
4152             0  0  0  | 22 23  0  | 24  0
4153     -------------------------------------
4154     Proc2  25 26 27  |  0  0 28  | 29  0
4155            30  0  0  | 31 32 33  |  0 34
4156 .ve
4157 
4158    This can be represented as a collection of submatrices as:
4159 
4160 .vb
4161       A B C
4162       D E F
4163       G H I
4164 .ve
4165 
4166    Where the submatrices A,B,C are owned by proc0, D,E,F are
4167    owned by proc1, G,H,I are owned by proc2.
4168 
4169    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4170    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4171    The 'M','N' parameters are 8,8, and have the same values on all procs.
4172 
4173    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4174    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4175    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4176    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4177    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4178    matrix, ans [DF] as another SeqAIJ matrix.
4179 
4180    When d_nz, o_nz parameters are specified, d_nz storage elements are
4181    allocated for every row of the local diagonal submatrix, and o_nz
4182    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4183    One way to choose d_nz and o_nz is to use the max nonzerors per local
4184    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4185    In this case, the values of d_nz,o_nz are:
4186 .vb
4187      proc0 : dnz = 2, o_nz = 2
4188      proc1 : dnz = 3, o_nz = 2
4189      proc2 : dnz = 1, o_nz = 4
4190 .ve
4191    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4192    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4193    for proc3. i.e we are using 12+15+10=37 storage locations to store
4194    34 values.
4195 
4196    When d_nnz, o_nnz parameters are specified, the storage is specified
4197    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4198    In the above case the values for d_nnz,o_nnz are:
4199 .vb
4200      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4201      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4202      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4203 .ve
4204    Here the space allocated is sum of all the above values i.e 34, and
4205    hence pre-allocation is perfect.
4206 
4207    Level: intermediate
4208 
4209 .keywords: matrix, aij, compressed row, sparse, parallel
4210 
4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4212           MPIAIJ, MatCreateMPIAIJWithArrays()
4213 @*/
4214 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4215 {
4216   PetscErrorCode ierr;
4217   PetscMPIInt    size;
4218 
4219   PetscFunctionBegin;
4220   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4221   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4222   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4223   if (size > 1) {
4224     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4225     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4226   } else {
4227     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4228     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4229   }
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 #undef __FUNCT__
4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4235 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4236 {
4237   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4238 
4239   PetscFunctionBegin;
4240   *Ad     = a->A;
4241   *Ao     = a->B;
4242   *colmap = a->garray;
4243   PetscFunctionReturn(0);
4244 }
4245 
4246 #undef __FUNCT__
4247 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4249 {
4250   PetscErrorCode ierr;
4251   PetscInt       i;
4252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4253 
4254   PetscFunctionBegin;
4255   if (coloring->ctype == IS_COLORING_GLOBAL) {
4256     ISColoringValue *allcolors,*colors;
4257     ISColoring      ocoloring;
4258 
4259     /* set coloring for diagonal portion */
4260     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4261 
4262     /* set coloring for off-diagonal portion */
4263     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4264     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4265     for (i=0; i<a->B->cmap->n; i++) {
4266       colors[i] = allcolors[a->garray[i]];
4267     }
4268     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4269     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4270     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4271     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4272   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4273     ISColoringValue *colors;
4274     PetscInt        *larray;
4275     ISColoring      ocoloring;
4276 
4277     /* set coloring for diagonal portion */
4278     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4279     for (i=0; i<a->A->cmap->n; i++) {
4280       larray[i] = i + A->cmap->rstart;
4281     }
4282     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4283     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4284     for (i=0; i<a->A->cmap->n; i++) {
4285       colors[i] = coloring->colors[larray[i]];
4286     }
4287     ierr = PetscFree(larray);CHKERRQ(ierr);
4288     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4289     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4290     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4291 
4292     /* set coloring for off-diagonal portion */
4293     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4294     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4295     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4296     for (i=0; i<a->B->cmap->n; i++) {
4297       colors[i] = coloring->colors[larray[i]];
4298     }
4299     ierr = PetscFree(larray);CHKERRQ(ierr);
4300     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4301     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4302     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4303   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4304 
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 #if defined(PETSC_HAVE_ADIC)
4309 #undef __FUNCT__
4310 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4311 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4312 {
4313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4314   PetscErrorCode ierr;
4315 
4316   PetscFunctionBegin;
4317   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4318   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4319   PetscFunctionReturn(0);
4320 }
4321 #endif
4322 
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4325 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4326 {
4327   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4328   PetscErrorCode ierr;
4329 
4330   PetscFunctionBegin;
4331   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4332   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4333   PetscFunctionReturn(0);
4334 }
4335 
4336 #undef __FUNCT__
4337 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4338 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4339 {
4340   PetscErrorCode ierr;
4341   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4342   PetscInt       *indx;
4343 
4344   PetscFunctionBegin;
4345   /* This routine will ONLY return MPIAIJ type matrix */
4346   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4347   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4348   if (n == PETSC_DECIDE){
4349     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4350   }
4351   /* Check sum(n) = N */
4352   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4353   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4354 
4355   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4356   rstart -= m;
4357 
4358   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4359   for (i=0;i<m;i++) {
4360     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4361     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4362     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4363   }
4364 
4365   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4366   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4367   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4368   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4369   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4370   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 #undef __FUNCT__
4375 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4376 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4377 {
4378   PetscErrorCode ierr;
4379   PetscInt       m,N,i,rstart,nnz,Ii;
4380   PetscInt       *indx;
4381   PetscScalar    *values;
4382 
4383   PetscFunctionBegin;
4384   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4385   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4386   for (i=0;i<m;i++) {
4387     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4388     Ii    = i + rstart;
4389     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4390     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4391   }
4392   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394   PetscFunctionReturn(0);
4395 }
4396 
4397 #undef __FUNCT__
4398 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4399 /*@
4400       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4401                  matrices from each processor
4402 
4403     Collective on MPI_Comm
4404 
4405    Input Parameters:
4406 +    comm - the communicators the parallel matrix will live on
4407 .    inmat - the input sequential matrices
4408 .    n - number of local columns (or PETSC_DECIDE)
4409 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4410 
4411    Output Parameter:
4412 .    outmat - the parallel matrix generated
4413 
4414     Level: advanced
4415 
4416    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4417 
4418 @*/
4419 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4420 {
4421   PetscErrorCode ierr;
4422 
4423   PetscFunctionBegin;
4424   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4425   if (scall == MAT_INITIAL_MATRIX){
4426     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4427   }
4428   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4429   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 #undef __FUNCT__
4434 #define __FUNCT__ "MatFileSplit"
4435 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4436 {
4437   PetscErrorCode    ierr;
4438   PetscMPIInt       rank;
4439   PetscInt          m,N,i,rstart,nnz;
4440   size_t            len;
4441   const PetscInt    *indx;
4442   PetscViewer       out;
4443   char              *name;
4444   Mat               B;
4445   const PetscScalar *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4449   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4450   /* Should this be the type of the diagonal block of A? */
4451   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4452   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4453   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4454   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4455   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4456   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4457   for (i=0;i<m;i++) {
4458     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4459     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4460     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4461   }
4462   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4464 
4465   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4466   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4467   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4468   sprintf(name,"%s.%d",outfile,rank);
4469   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4470   ierr = PetscFree(name);
4471   ierr = MatView(B,out);CHKERRQ(ierr);
4472   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4473   ierr = MatDestroy(&B);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4478 #undef __FUNCT__
4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4480 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4481 {
4482   PetscErrorCode       ierr;
4483   Mat_Merge_SeqsToMPI  *merge;
4484   PetscContainer       container;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4488   if (container) {
4489     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4502     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4503     ierr = PetscFree(merge);CHKERRQ(ierr);
4504     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4505   }
4506   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #include <../src/mat/utils/freespace.h>
4511 #include <petscbt.h>
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4515 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4516 {
4517   PetscErrorCode       ierr;
4518   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4519   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt          size,rank,taga,*len_s;
4521   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4522   PetscInt             proc,m;
4523   PetscInt             **buf_ri,**buf_rj;
4524   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4525   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4526   MPI_Request          *s_waits,*r_waits;
4527   MPI_Status           *status;
4528   MatScalar            *aa=a->a;
4529   MatScalar            **abuf_r,*ba_i;
4530   Mat_Merge_SeqsToMPI  *merge;
4531   PetscContainer       container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4535 
4536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4537   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4538 
4539   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4540   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4541 
4542   bi     = merge->bi;
4543   bj     = merge->bj;
4544   buf_ri = merge->buf_ri;
4545   buf_rj = merge->buf_rj;
4546 
4547   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4548   owners = merge->rowmap->range;
4549   len_s  = merge->len_s;
4550 
4551   /* send and recv matrix values */
4552   /*-----------------------------*/
4553   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4554   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4555 
4556   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4557   for (proc=0,k=0; proc<size; proc++){
4558     if (!len_s[proc]) continue;
4559     i = owners[proc];
4560     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4561     k++;
4562   }
4563 
4564   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4565   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4566   ierr = PetscFree(status);CHKERRQ(ierr);
4567 
4568   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4569   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4570 
4571   /* insert mat values of mpimat */
4572   /*----------------------------*/
4573   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4574   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4575 
4576   for (k=0; k<merge->nrecv; k++){
4577     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4578     nrows = *(buf_ri_k[k]);
4579     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4580     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4581   }
4582 
4583   /* set values of ba */
4584   m = merge->rowmap->n;
4585   for (i=0; i<m; i++) {
4586     arow = owners[rank] + i;
4587     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4588     bnzi = bi[i+1] - bi[i];
4589     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4590 
4591     /* add local non-zero vals of this proc's seqmat into ba */
4592     anzi = ai[arow+1] - ai[arow];
4593     aj   = a->j + ai[arow];
4594     aa   = a->a + ai[arow];
4595     nextaj = 0;
4596     for (j=0; nextaj<anzi; j++){
4597       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4598         ba_i[j] += aa[nextaj++];
4599       }
4600     }
4601 
4602     /* add received vals into ba */
4603     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4604       /* i-th row */
4605       if (i == *nextrow[k]) {
4606         anzi = *(nextai[k]+1) - *nextai[k];
4607         aj   = buf_rj[k] + *(nextai[k]);
4608         aa   = abuf_r[k] + *(nextai[k]);
4609         nextaj = 0;
4610         for (j=0; nextaj<anzi; j++){
4611           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4612             ba_i[j] += aa[nextaj++];
4613           }
4614         }
4615         nextrow[k]++; nextai[k]++;
4616       }
4617     }
4618     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4619   }
4620   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622 
4623   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4624   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4625   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4626   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4627   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4635 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4636 {
4637   PetscErrorCode       ierr;
4638   Mat                  B_mpi;
4639   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4640   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4641   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4642   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4643   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4644   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4645   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4646   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4647   MPI_Status           *status;
4648   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4649   PetscBT              lnkbt;
4650   Mat_Merge_SeqsToMPI  *merge;
4651   PetscContainer       container;
4652 
4653   PetscFunctionBegin;
4654   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4655 
4656   /* make sure it is a PETSc comm */
4657   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4660 
4661   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4662   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4663 
4664   /* determine row ownership */
4665   /*---------------------------------------------------------*/
4666   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4671   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4672   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4673 
4674   m      = merge->rowmap->n;
4675   M      = merge->rowmap->N;
4676   owners = merge->rowmap->range;
4677 
4678   /* determine the number of messages to send, their lengths */
4679   /*---------------------------------------------------------*/
4680   len_s  = merge->len_s;
4681 
4682   len = 0;  /* length of buf_si[] */
4683   merge->nsend = 0;
4684   for (proc=0; proc<size; proc++){
4685     len_si[proc] = 0;
4686     if (proc == rank){
4687       len_s[proc] = 0;
4688     } else {
4689       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4690       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4691     }
4692     if (len_s[proc]) {
4693       merge->nsend++;
4694       nrows = 0;
4695       for (i=owners[proc]; i<owners[proc+1]; i++){
4696         if (ai[i+1] > ai[i]) nrows++;
4697       }
4698       len_si[proc] = 2*(nrows+1);
4699       len += len_si[proc];
4700     }
4701   }
4702 
4703   /* determine the number and length of messages to receive for ij-structure */
4704   /*-------------------------------------------------------------------------*/
4705   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4706   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4707 
4708   /* post the Irecv of j-structure */
4709   /*-------------------------------*/
4710   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4711   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4712 
4713   /* post the Isend of j-structure */
4714   /*--------------------------------*/
4715   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4716 
4717   for (proc=0, k=0; proc<size; proc++){
4718     if (!len_s[proc]) continue;
4719     i = owners[proc];
4720     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4721     k++;
4722   }
4723 
4724   /* receives and sends of j-structure are complete */
4725   /*------------------------------------------------*/
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4728 
4729   /* send and recv i-structure */
4730   /*---------------------------*/
4731   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4732   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4733 
4734   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4735   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4736   for (proc=0,k=0; proc<size; proc++){
4737     if (!len_s[proc]) continue;
4738     /* form outgoing message for i-structure:
4739          buf_si[0]:                 nrows to be sent
4740                [1:nrows]:           row index (global)
4741                [nrows+1:2*nrows+1]: i-structure index
4742     */
4743     /*-------------------------------------------*/
4744     nrows = len_si[proc]/2 - 1;
4745     buf_si_i    = buf_si + nrows+1;
4746     buf_si[0]   = nrows;
4747     buf_si_i[0] = 0;
4748     nrows = 0;
4749     for (i=owners[proc]; i<owners[proc+1]; i++){
4750       anzi = ai[i+1] - ai[i];
4751       if (anzi) {
4752         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4753         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4754         nrows++;
4755       }
4756     }
4757     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4758     k++;
4759     buf_si += len_si[proc];
4760   }
4761 
4762   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4763   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4764 
4765   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4766   for (i=0; i<merge->nrecv; i++){
4767     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4768   }
4769 
4770   ierr = PetscFree(len_si);CHKERRQ(ierr);
4771   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4772   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4775   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4776   ierr = PetscFree(status);CHKERRQ(ierr);
4777 
4778   /* compute a local seq matrix in each processor */
4779   /*----------------------------------------------*/
4780   /* allocate bi array and free space for accumulating nonzero column info */
4781   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4782   bi[0] = 0;
4783 
4784   /* create and initialize a linked list */
4785   nlnk = N+1;
4786   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4789   len = 0;
4790   len  = ai[owners[rank+1]] - ai[owners[rank]];
4791   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4792   current_space = free_space;
4793 
4794   /* determine symbolic info for each local row */
4795   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4796 
4797   for (k=0; k<merge->nrecv; k++){
4798     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4799     nrows = *buf_ri_k[k];
4800     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4801     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4802   }
4803 
4804   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4805   len = 0;
4806   for (i=0;i<m;i++) {
4807     bnzi   = 0;
4808     /* add local non-zero cols of this proc's seqmat into lnk */
4809     arow   = owners[rank] + i;
4810     anzi   = ai[arow+1] - ai[arow];
4811     aj     = a->j + ai[arow];
4812     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813     bnzi += nlnk;
4814     /* add received col data into lnk */
4815     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4816       if (i == *nextrow[k]) { /* i-th row */
4817         anzi = *(nextai[k]+1) - *nextai[k];
4818         aj   = buf_rj[k] + *nextai[k];
4819         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4820         bnzi += nlnk;
4821         nextrow[k]++; nextai[k]++;
4822       }
4823     }
4824     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4825 
4826     /* if free space is not available, make more free space */
4827     if (current_space->local_remaining<bnzi) {
4828       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4829       nspacedouble++;
4830     }
4831     /* copy data into free space, then initialize lnk */
4832     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4833     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4834 
4835     current_space->array           += bnzi;
4836     current_space->local_used      += bnzi;
4837     current_space->local_remaining -= bnzi;
4838 
4839     bi[i+1] = bi[i] + bnzi;
4840   }
4841 
4842   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4843 
4844   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4845   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4846   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4847 
4848   /* create symbolic parallel matrix B_mpi */
4849   /*---------------------------------------*/
4850     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4851   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4852   if (n==PETSC_DECIDE) {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4854   } else {
4855     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4856   }
4857   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4858   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4859   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4861   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4862 
4863   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4864   B_mpi->assembled     = PETSC_FALSE;
4865   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4866   merge->bi            = bi;
4867   merge->bj            = bj;
4868   merge->buf_ri        = buf_ri;
4869   merge->buf_rj        = buf_rj;
4870   merge->coi           = PETSC_NULL;
4871   merge->coj           = PETSC_NULL;
4872   merge->owners_co     = PETSC_NULL;
4873 
4874   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4875 
4876   /* attach the supporting struct to B_mpi for reuse */
4877   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4878   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4879   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4880   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4881   *mpimat = B_mpi;
4882 
4883   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4884   PetscFunctionReturn(0);
4885 }
4886 
4887 #undef __FUNCT__
4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4889 /*@C
4890       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4891                  matrices from each processor
4892 
4893     Collective on MPI_Comm
4894 
4895    Input Parameters:
4896 +    comm - the communicators the parallel matrix will live on
4897 .    seqmat - the input sequential matrices
4898 .    m - number of local rows (or PETSC_DECIDE)
4899 .    n - number of local columns (or PETSC_DECIDE)
4900 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4901 
4902    Output Parameter:
4903 .    mpimat - the parallel matrix generated
4904 
4905     Level: advanced
4906 
4907    Notes:
4908      The dimensions of the sequential matrix in each processor MUST be the same.
4909      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4910      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4911 @*/
4912 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4913 {
4914   PetscErrorCode   ierr;
4915   PetscMPIInt     size;
4916 
4917   PetscFunctionBegin;
4918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4919   if (size == 1){
4920      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921      if (scall == MAT_INITIAL_MATRIX){
4922        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4923      } else {
4924        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4925      }
4926      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4927      PetscFunctionReturn(0);
4928   }
4929   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   if (scall == MAT_INITIAL_MATRIX){
4931     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4932   }
4933   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4934   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4940 /*@
4941      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4942           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4943           with MatGetSize()
4944 
4945     Not Collective
4946 
4947    Input Parameters:
4948 +    A - the matrix
4949 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4950 
4951    Output Parameter:
4952 .    A_loc - the local sequential matrix generated
4953 
4954     Level: developer
4955 
4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4957 
4958 @*/
4959 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4960 {
4961   PetscErrorCode  ierr;
4962   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4963   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4964   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4965   MatScalar       *aa=a->a,*ba=b->a,*cam;
4966   PetscScalar     *ca;
4967   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4968   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4969   PetscBool       match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4974   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4975   if (scall == MAT_INITIAL_MATRIX){
4976     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4977     ci[0] = 0;
4978     for (i=0; i<am; i++){
4979       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4980     }
4981     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4982     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4983     k = 0;
4984     for (i=0; i<am; i++) {
4985       ncols_o = bi[i+1] - bi[i];
4986       ncols_d = ai[i+1] - ai[i];
4987       /* off-diagonal portion of A */
4988       for (jo=0; jo<ncols_o; jo++) {
4989         col = cmap[*bj];
4990         if (col >= cstart) break;
4991         cj[k]   = col; bj++;
4992         ca[k++] = *ba++;
4993       }
4994       /* diagonal portion of A */
4995       for (j=0; j<ncols_d; j++) {
4996         cj[k]   = cstart + *aj++;
4997         ca[k++] = *aa++;
4998       }
4999       /* off-diagonal portion of A */
5000       for (j=jo; j<ncols_o; j++) {
5001         cj[k]   = cmap[*bj++];
5002         ca[k++] = *ba++;
5003       }
5004     }
5005     /* put together the new matrix */
5006     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5007     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5008     /* Since these are PETSc arrays, change flags to free them as necessary. */
5009     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5010     mat->free_a  = PETSC_TRUE;
5011     mat->free_ij = PETSC_TRUE;
5012     mat->nonew   = 0;
5013   } else if (scall == MAT_REUSE_MATRIX){
5014     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5015     ci = mat->i; cj = mat->j; cam = mat->a;
5016     for (i=0; i<am; i++) {
5017       /* off-diagonal portion of A */
5018       ncols_o = bi[i+1] - bi[i];
5019       for (jo=0; jo<ncols_o; jo++) {
5020         col = cmap[*bj];
5021         if (col >= cstart) break;
5022         *cam++ = *ba++; bj++;
5023       }
5024       /* diagonal portion of A */
5025       ncols_d = ai[i+1] - ai[i];
5026       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5027       /* off-diagonal portion of A */
5028       for (j=jo; j<ncols_o; j++) {
5029         *cam++ = *ba++; bj++;
5030       }
5031     }
5032   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5033   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5034   PetscFunctionReturn(0);
5035 }
5036 
5037 #undef __FUNCT__
5038 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5039 /*@C
5040      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5041 
5042     Not Collective
5043 
5044    Input Parameters:
5045 +    A - the matrix
5046 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5047 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5048 
5049    Output Parameter:
5050 .    A_loc - the local sequential matrix generated
5051 
5052     Level: developer
5053 
5054 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5055 
5056 @*/
5057 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5058 {
5059   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5060   PetscErrorCode    ierr;
5061   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5062   IS                isrowa,iscola;
5063   Mat               *aloc;
5064   PetscBool       match;
5065 
5066   PetscFunctionBegin;
5067   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5068   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5069   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5070   if (!row){
5071     start = A->rmap->rstart; end = A->rmap->rend;
5072     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5073   } else {
5074     isrowa = *row;
5075   }
5076   if (!col){
5077     start = A->cmap->rstart;
5078     cmap  = a->garray;
5079     nzA   = a->A->cmap->n;
5080     nzB   = a->B->cmap->n;
5081     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5082     ncols = 0;
5083     for (i=0; i<nzB; i++) {
5084       if (cmap[i] < start) idx[ncols++] = cmap[i];
5085       else break;
5086     }
5087     imark = i;
5088     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5089     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5090     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5091   } else {
5092     iscola = *col;
5093   }
5094   if (scall != MAT_INITIAL_MATRIX){
5095     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5096     aloc[0] = *A_loc;
5097   }
5098   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5099   *A_loc = aloc[0];
5100   ierr = PetscFree(aloc);CHKERRQ(ierr);
5101   if (!row){
5102     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5103   }
5104   if (!col){
5105     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5106   }
5107   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5108   PetscFunctionReturn(0);
5109 }
5110 
5111 #undef __FUNCT__
5112 #define __FUNCT__ "MatGetBrowsOfAcols"
5113 /*@C
5114     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5115 
5116     Collective on Mat
5117 
5118    Input Parameters:
5119 +    A,B - the matrices in mpiaij format
5120 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5121 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5122 
5123    Output Parameter:
5124 +    rowb, colb - index sets of rows and columns of B to extract
5125 -    B_seq - the sequential matrix generated
5126 
5127     Level: developer
5128 
5129 @*/
5130 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5131 {
5132   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5133   PetscErrorCode    ierr;
5134   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5135   IS                isrowb,iscolb;
5136   Mat               *bseq=PETSC_NULL;
5137 
5138   PetscFunctionBegin;
5139   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5140     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5141   }
5142   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5143 
5144   if (scall == MAT_INITIAL_MATRIX){
5145     start = A->cmap->rstart;
5146     cmap  = a->garray;
5147     nzA   = a->A->cmap->n;
5148     nzB   = a->B->cmap->n;
5149     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5150     ncols = 0;
5151     for (i=0; i<nzB; i++) {  /* row < local row index */
5152       if (cmap[i] < start) idx[ncols++] = cmap[i];
5153       else break;
5154     }
5155     imark = i;
5156     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5157     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5158     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5159     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5160   } else {
5161     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5162     isrowb = *rowb; iscolb = *colb;
5163     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5164     bseq[0] = *B_seq;
5165   }
5166   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5167   *B_seq = bseq[0];
5168   ierr = PetscFree(bseq);CHKERRQ(ierr);
5169   if (!rowb){
5170     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5171   } else {
5172     *rowb = isrowb;
5173   }
5174   if (!colb){
5175     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5176   } else {
5177     *colb = iscolb;
5178   }
5179   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5180   PetscFunctionReturn(0);
5181 }
5182 
5183 #undef __FUNCT__
5184 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5185 /*
5186     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5187     of the OFF-DIAGONAL portion of local A
5188 
5189     Collective on Mat
5190 
5191    Input Parameters:
5192 +    A,B - the matrices in mpiaij format
5193 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5194 
5195    Output Parameter:
5196 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5197 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5198 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5199 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5200 
5201     Level: developer
5202 
5203 */
5204 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5205 {
5206   VecScatter_MPI_General *gen_to,*gen_from;
5207   PetscErrorCode         ierr;
5208   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5209   Mat_SeqAIJ             *b_oth;
5210   VecScatter             ctx=a->Mvctx;
5211   MPI_Comm               comm=((PetscObject)ctx)->comm;
5212   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5213   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5214   PetscScalar            *rvalues,*svalues;
5215   MatScalar              *b_otha,*bufa,*bufA;
5216   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5217   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5218   MPI_Status             *sstatus,rstatus;
5219   PetscMPIInt            jj;
5220   PetscInt               *cols,sbs,rbs;
5221   PetscScalar            *vals;
5222 
5223   PetscFunctionBegin;
5224   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5225     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5226   }
5227   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5228   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5229 
5230   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5231   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5232   rvalues  = gen_from->values; /* holds the length of receiving row */
5233   svalues  = gen_to->values;   /* holds the length of sending row */
5234   nrecvs   = gen_from->n;
5235   nsends   = gen_to->n;
5236 
5237   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5238   srow     = gen_to->indices;   /* local row index to be sent */
5239   sstarts  = gen_to->starts;
5240   sprocs   = gen_to->procs;
5241   sstatus  = gen_to->sstatus;
5242   sbs      = gen_to->bs;
5243   rstarts  = gen_from->starts;
5244   rprocs   = gen_from->procs;
5245   rbs      = gen_from->bs;
5246 
5247   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5248   if (scall == MAT_INITIAL_MATRIX){
5249     /* i-array */
5250     /*---------*/
5251     /*  post receives */
5252     for (i=0; i<nrecvs; i++){
5253       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5254       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5255       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5256     }
5257 
5258     /* pack the outgoing message */
5259     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5260     sstartsj[0] = 0;  rstartsj[0] = 0;
5261     len = 0; /* total length of j or a array to be sent */
5262     k = 0;
5263     for (i=0; i<nsends; i++){
5264       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5265       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5266       for (j=0; j<nrows; j++) {
5267         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5268         for (l=0; l<sbs; l++){
5269           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5270           rowlen[j*sbs+l] = ncols;
5271           len += ncols;
5272           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5273         }
5274         k++;
5275       }
5276       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5277       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5278     }
5279     /* recvs and sends of i-array are completed */
5280     i = nrecvs;
5281     while (i--) {
5282       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5283     }
5284     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5285 
5286     /* allocate buffers for sending j and a arrays */
5287     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5288     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5289 
5290     /* create i-array of B_oth */
5291     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5292     b_othi[0] = 0;
5293     len = 0; /* total length of j or a array to be received */
5294     k = 0;
5295     for (i=0; i<nrecvs; i++){
5296       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5297       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5298       for (j=0; j<nrows; j++) {
5299         b_othi[k+1] = b_othi[k] + rowlen[j];
5300         len += rowlen[j]; k++;
5301       }
5302       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5303     }
5304 
5305     /* allocate space for j and a arrrays of B_oth */
5306     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5307     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5308 
5309     /* j-array */
5310     /*---------*/
5311     /*  post receives of j-array */
5312     for (i=0; i<nrecvs; i++){
5313       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5314       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5315     }
5316 
5317     /* pack the outgoing message j-array */
5318     k = 0;
5319     for (i=0; i<nsends; i++){
5320       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5321       bufJ = bufj+sstartsj[i];
5322       for (j=0; j<nrows; j++) {
5323         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5324         for (ll=0; ll<sbs; ll++){
5325           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5326           for (l=0; l<ncols; l++){
5327             *bufJ++ = cols[l];
5328           }
5329           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5330         }
5331       }
5332       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5333     }
5334 
5335     /* recvs and sends of j-array are completed */
5336     i = nrecvs;
5337     while (i--) {
5338       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5339     }
5340     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5341   } else if (scall == MAT_REUSE_MATRIX){
5342     sstartsj = *startsj_s;
5343     rstartsj = *startsj_r;
5344     bufa     = *bufa_ptr;
5345     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5346     b_otha   = b_oth->a;
5347   } else {
5348     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5349   }
5350 
5351   /* a-array */
5352   /*---------*/
5353   /*  post receives of a-array */
5354   for (i=0; i<nrecvs; i++){
5355     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5356     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5357   }
5358 
5359   /* pack the outgoing message a-array */
5360   k = 0;
5361   for (i=0; i<nsends; i++){
5362     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5363     bufA = bufa+sstartsj[i];
5364     for (j=0; j<nrows; j++) {
5365       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5366       for (ll=0; ll<sbs; ll++){
5367         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5368         for (l=0; l<ncols; l++){
5369           *bufA++ = vals[l];
5370         }
5371         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5372       }
5373     }
5374     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5375   }
5376   /* recvs and sends of a-array are completed */
5377   i = nrecvs;
5378   while (i--) {
5379     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380   }
5381   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5382   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5383 
5384   if (scall == MAT_INITIAL_MATRIX){
5385     /* put together the new matrix */
5386     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5387 
5388     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5389     /* Since these are PETSc arrays, change flags to free them as necessary. */
5390     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5391     b_oth->free_a  = PETSC_TRUE;
5392     b_oth->free_ij = PETSC_TRUE;
5393     b_oth->nonew   = 0;
5394 
5395     ierr = PetscFree(bufj);CHKERRQ(ierr);
5396     if (!startsj_s || !bufa_ptr){
5397       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5398       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5399     } else {
5400       *startsj_s = sstartsj;
5401       *startsj_r = rstartsj;
5402       *bufa_ptr  = bufa;
5403     }
5404   }
5405   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 #undef __FUNCT__
5410 #define __FUNCT__ "MatGetCommunicationStructs"
5411 /*@C
5412   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5413 
5414   Not Collective
5415 
5416   Input Parameters:
5417 . A - The matrix in mpiaij format
5418 
5419   Output Parameter:
5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5421 . colmap - A map from global column index to local index into lvec
5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5423 
5424   Level: developer
5425 
5426 @*/
5427 #if defined (PETSC_USE_CTABLE)
5428 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5429 #else
5430 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5431 #endif
5432 {
5433   Mat_MPIAIJ *a;
5434 
5435   PetscFunctionBegin;
5436   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5437   PetscValidPointer(lvec, 2);
5438   PetscValidPointer(colmap, 3);
5439   PetscValidPointer(multScatter, 4);
5440   a = (Mat_MPIAIJ *) A->data;
5441   if (lvec) *lvec = a->lvec;
5442   if (colmap) *colmap = a->colmap;
5443   if (multScatter) *multScatter = a->Mvctx;
5444   PetscFunctionReturn(0);
5445 }
5446 
5447 EXTERN_C_BEGIN
5448 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5449 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5450 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5451 EXTERN_C_END
5452 
5453 #undef __FUNCT__
5454 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5455 /*
5456     Computes (B'*A')' since computing B*A directly is untenable
5457 
5458                n                       p                          p
5459         (              )       (              )         (                  )
5460       m (      A       )  *  n (       B      )   =   m (         C        )
5461         (              )       (              )         (                  )
5462 
5463 */
5464 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5465 {
5466   PetscErrorCode     ierr;
5467   Mat                At,Bt,Ct;
5468 
5469   PetscFunctionBegin;
5470   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5471   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5472   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5473   ierr = MatDestroy(&At);CHKERRQ(ierr);
5474   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5475   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5476   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 #undef __FUNCT__
5481 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5482 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5483 {
5484   PetscErrorCode ierr;
5485   PetscInt       m=A->rmap->n,n=B->cmap->n;
5486   Mat            Cmat;
5487 
5488   PetscFunctionBegin;
5489   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5490   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5491   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5492   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5493   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5494   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5495   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497   *C   = Cmat;
5498   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /* ----------------------------------------------------------------*/
5503 #undef __FUNCT__
5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5506 {
5507   PetscErrorCode ierr;
5508 
5509   PetscFunctionBegin;
5510   if (scall == MAT_INITIAL_MATRIX){
5511     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5512   }
5513   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 EXTERN_C_BEGIN
5518 #if defined(PETSC_HAVE_MUMPS)
5519 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5520 #endif
5521 #if defined(PETSC_HAVE_PASTIX)
5522 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_SUPERLU_DIST)
5525 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_SPOOLES)
5528 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_CLIQUE)
5531 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5532 #endif
5533 EXTERN_C_END
5534 
5535 /*MC
5536    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5537 
5538    Options Database Keys:
5539 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5540 
5541   Level: beginner
5542 
5543 .seealso: MatCreateAIJ()
5544 M*/
5545 
5546 EXTERN_C_BEGIN
5547 #undef __FUNCT__
5548 #define __FUNCT__ "MatCreate_MPIAIJ"
5549 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5550 {
5551   Mat_MPIAIJ     *b;
5552   PetscErrorCode ierr;
5553   PetscMPIInt    size;
5554 
5555   PetscFunctionBegin;
5556   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5557   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5558   B->data         = (void*)b;
5559   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5560   B->assembled    = PETSC_FALSE;
5561   B->insertmode   = NOT_SET_VALUES;
5562   b->size         = size;
5563   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5564 
5565   /* build cache for off array entries formed */
5566   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5567   b->donotstash  = PETSC_FALSE;
5568   b->colmap      = 0;
5569   b->garray      = 0;
5570   b->roworiented = PETSC_TRUE;
5571 
5572   /* stuff used for matrix vector multiply */
5573   b->lvec      = PETSC_NULL;
5574   b->Mvctx     = PETSC_NULL;
5575 
5576   /* stuff for MatGetRow() */
5577   b->rowindices   = 0;
5578   b->rowvalues    = 0;
5579   b->getrowactive = PETSC_FALSE;
5580 
5581   /* flexible pointer used in CUSP/CUSPARSE classes */
5582   b->spptr        = PETSC_NULL;
5583 #if defined(PETSC_HAVE_SPOOLES)
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5585                                      "MatGetFactor_mpiaij_spooles",
5586                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_MUMPS)
5589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5590                                      "MatGetFactor_aij_mumps",
5591                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5592 #endif
5593 #if defined(PETSC_HAVE_PASTIX)
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5595 					   "MatGetFactor_mpiaij_pastix",
5596 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5597 #endif
5598 #if defined(PETSC_HAVE_SUPERLU_DIST)
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5600                                      "MatGetFactor_mpiaij_superlu_dist",
5601                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5602 #endif
5603 #if defined(PETSC_HAVE_CLIQUE)
5604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5605                                      "MatGetFactor_aij_clique",
5606                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5607 #endif
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5609                                      "MatStoreValues_MPIAIJ",
5610                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5612                                      "MatRetrieveValues_MPIAIJ",
5613                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5615 				     "MatGetDiagonalBlock_MPIAIJ",
5616                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5618 				     "MatIsTranspose_MPIAIJ",
5619 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5621 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5622 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5624 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5625 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5627 				     "MatDiagonalScaleLocal_MPIAIJ",
5628 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5630                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5631                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5633                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5634                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5636                                      "MatConvert_MPIAIJ_MPISBAIJ",
5637                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5639                                      "MatMatMult_MPIDense_MPIAIJ",
5640                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5642                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5643                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5645                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5646                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5648   PetscFunctionReturn(0);
5649 }
5650 EXTERN_C_END
5651 
5652 #undef __FUNCT__
5653 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5654 /*@
5655      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5656          and "off-diagonal" part of the matrix in CSR format.
5657 
5658    Collective on MPI_Comm
5659 
5660    Input Parameters:
5661 +  comm - MPI communicator
5662 .  m - number of local rows (Cannot be PETSC_DECIDE)
5663 .  n - This value should be the same as the local size used in creating the
5664        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5665        calculated if N is given) For square matrices n is almost always m.
5666 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5667 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5668 .   i - row indices for "diagonal" portion of matrix
5669 .   j - column indices
5670 .   a - matrix values
5671 .   oi - row indices for "off-diagonal" portion of matrix
5672 .   oj - column indices
5673 -   oa - matrix values
5674 
5675    Output Parameter:
5676 .   mat - the matrix
5677 
5678    Level: advanced
5679 
5680    Notes:
5681        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5682        must free the arrays once the matrix has been destroyed and not before.
5683 
5684        The i and j indices are 0 based
5685 
5686        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5687 
5688        This sets local rows and cannot be used to set off-processor values.
5689 
5690        You cannot later use MatSetValues() to change values in this matrix.
5691 
5692 .keywords: matrix, aij, compressed row, sparse, parallel
5693 
5694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5695           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5696 @*/
5697 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5698 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5699 {
5700   PetscErrorCode ierr;
5701   Mat_MPIAIJ     *maij;
5702 
5703  PetscFunctionBegin;
5704   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5705   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5706   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5707   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5708   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5709   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5710   maij = (Mat_MPIAIJ*) (*mat)->data;
5711   maij->donotstash     = PETSC_TRUE;
5712   (*mat)->preallocated = PETSC_TRUE;
5713 
5714   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5715   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5716 
5717   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5718   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5719 
5720   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5721   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5722   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5723   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5724 
5725   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5727   PetscFunctionReturn(0);
5728 }
5729 
5730 /*
5731     Special version for direct calls from Fortran
5732 */
5733 #include <petsc-private/fortranimpl.h>
5734 
5735 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5736 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5737 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5738 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5739 #endif
5740 
5741 /* Change these macros so can be used in void function */
5742 #undef CHKERRQ
5743 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5744 #undef SETERRQ2
5745 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5746 #undef SETERRQ3
5747 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5748 #undef SETERRQ
5749 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5750 
5751 EXTERN_C_BEGIN
5752 #undef __FUNCT__
5753 #define __FUNCT__ "matsetvaluesmpiaij_"
5754 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5755 {
5756   Mat             mat = *mmat;
5757   PetscInt        m = *mm, n = *mn;
5758   InsertMode      addv = *maddv;
5759   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5760   PetscScalar     value;
5761   PetscErrorCode  ierr;
5762 
5763   MatCheckPreallocated(mat,1);
5764   if (mat->insertmode == NOT_SET_VALUES) {
5765     mat->insertmode = addv;
5766   }
5767 #if defined(PETSC_USE_DEBUG)
5768   else if (mat->insertmode != addv) {
5769     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5770   }
5771 #endif
5772   {
5773   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5774   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5775   PetscBool       roworiented = aij->roworiented;
5776 
5777   /* Some Variables required in the macro */
5778   Mat             A = aij->A;
5779   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5780   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5781   MatScalar       *aa = a->a;
5782   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5783   Mat             B = aij->B;
5784   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5785   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5786   MatScalar       *ba = b->a;
5787 
5788   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5789   PetscInt        nonew = a->nonew;
5790   MatScalar       *ap1,*ap2;
5791 
5792   PetscFunctionBegin;
5793   for (i=0; i<m; i++) {
5794     if (im[i] < 0) continue;
5795 #if defined(PETSC_USE_DEBUG)
5796     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5797 #endif
5798     if (im[i] >= rstart && im[i] < rend) {
5799       row      = im[i] - rstart;
5800       lastcol1 = -1;
5801       rp1      = aj + ai[row];
5802       ap1      = aa + ai[row];
5803       rmax1    = aimax[row];
5804       nrow1    = ailen[row];
5805       low1     = 0;
5806       high1    = nrow1;
5807       lastcol2 = -1;
5808       rp2      = bj + bi[row];
5809       ap2      = ba + bi[row];
5810       rmax2    = bimax[row];
5811       nrow2    = bilen[row];
5812       low2     = 0;
5813       high2    = nrow2;
5814 
5815       for (j=0; j<n; j++) {
5816         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5817         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5818         if (in[j] >= cstart && in[j] < cend){
5819           col = in[j] - cstart;
5820           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5821         } else if (in[j] < 0) continue;
5822 #if defined(PETSC_USE_DEBUG)
5823         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5824 #endif
5825         else {
5826           if (mat->was_assembled) {
5827             if (!aij->colmap) {
5828               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5829             }
5830 #if defined (PETSC_USE_CTABLE)
5831             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5832 	    col--;
5833 #else
5834             col = aij->colmap[in[j]] - 1;
5835 #endif
5836             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5837               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5838               col =  in[j];
5839               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5840               B = aij->B;
5841               b = (Mat_SeqAIJ*)B->data;
5842               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5843               rp2      = bj + bi[row];
5844               ap2      = ba + bi[row];
5845               rmax2    = bimax[row];
5846               nrow2    = bilen[row];
5847               low2     = 0;
5848               high2    = nrow2;
5849               bm       = aij->B->rmap->n;
5850               ba = b->a;
5851             }
5852           } else col = in[j];
5853           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5854         }
5855       }
5856     } else {
5857       if (!aij->donotstash) {
5858         if (roworiented) {
5859           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5860         } else {
5861           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5862         }
5863       }
5864     }
5865   }}
5866   PetscFunctionReturnVoid();
5867 }
5868 EXTERN_C_END
5869 
5870