xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4e17fbf5bafa47185df4021e6c4f5fb738d34ecb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
204     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
205     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
206     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
207     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
208     rowners[0] = 0;
209     for (i=2; i<=size; i++) {
210       rowners[i] += rowners[i-1];
211     }
212     rstart = rowners[rank];
213     rend   = rowners[rank+1];
214     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
215     if (!rank) {
216       gmata = (Mat_SeqAIJ*) gmat->data;
217       /* send row lengths to all processors */
218       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
219       for (i=1; i<size; i++) {
220 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222       /* determine number diagonal and off-diagonal counts */
223       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
224       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
225       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
226       jj = 0;
227       for (i=0; i<m; i++) {
228 	for (j=0; j<dlens[i]; j++) {
229           if (gmata->j[jj] < rstart) ld[i]++;
230 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
231 	  jj++;
232 	}
233       }
234       /* send column indices to other processes */
235       for (i=1; i<size; i++) {
236 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
237 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
238 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
239       }
240 
241       /* send numerical values to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
245       }
246       gmataa = gmata->a;
247       gmataj = gmata->j;
248 
249     } else {
250       /* receive row lengths */
251       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
252       /* receive column indices */
253       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
255       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       /* determine number diagonal and off-diagonal counts */
257       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
258       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
259       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
260       jj = 0;
261       for (i=0; i<m; i++) {
262 	for (j=0; j<dlens[i]; j++) {
263           if (gmataj[jj] < rstart) ld[i]++;
264 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
265 	  jj++;
266 	}
267       }
268       /* receive numerical values */
269       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
270       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
271     }
272     /* set preallocation */
273     for (i=0; i<m; i++) {
274       dlens[i] -= olens[i];
275     }
276     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
277     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
278 
279     for (i=0; i<m; i++) {
280       dlens[i] += olens[i];
281     }
282     cnt  = 0;
283     for (i=0; i<m; i++) {
284       row  = rstart + i;
285       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
286       cnt += dlens[i];
287     }
288     if (rank) {
289       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
290     }
291     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
292     ierr = PetscFree(rowners);CHKERRQ(ierr);
293     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
294     *inmat = mat;
295   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
296     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
297     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
298     mat   = *inmat;
299     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
300     if (!rank) {
301       /* send numerical values to other processes */
302       gmata = (Mat_SeqAIJ*) gmat->data;
303       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
304       gmataa = gmata->a;
305       for (i=1; i<size; i++) {
306         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
307         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
308       }
309       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
310     } else {
311       /* receive numerical values from process 0*/
312       nz   = Ad->nz + Ao->nz;
313       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
314       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
315     }
316     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
317     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
318     ad = Ad->a;
319     ao = Ao->a;
320     if (mat->rmap->n) {
321       i  = 0;
322       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
323       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
324     }
325     for (i=1; i<mat->rmap->n; i++) {
326       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
327       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
328     }
329     i--;
330     if (mat->rmap->n) {
331       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332     }
333     if (rank) {
334       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
335     }
336   }
337   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
339   CHKMEMQ;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344   Local utility routine that creates a mapping from the global column
345 number to the local number in the off-diagonal part of the local
346 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
347 a slightly higher hash table cost; without it it is not scalable (each processor
348 has an order N integer array but is fast to acess.
349 */
350 #undef __FUNCT__
351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
353 {
354   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
355   PetscErrorCode ierr;
356   PetscInt       n = aij->B->cmap->n,i;
357 
358   PetscFunctionBegin;
359   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
360 #if defined (PETSC_USE_CTABLE)
361   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
362   for (i=0; i<n; i++){
363     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
364   }
365 #else
366   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
367   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
369   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
370 #endif
371   PetscFunctionReturn(0);
372 }
373 
374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
375 { \
376     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
377     lastcol1 = col;\
378     while (high1-low1 > 5) { \
379       t = (low1+high1)/2; \
380       if (rp1[t] > col) high1 = t; \
381       else             low1  = t; \
382     } \
383       for (_i=low1; _i<high1; _i++) { \
384         if (rp1[_i] > col) break; \
385         if (rp1[_i] == col) { \
386           if (addv == ADD_VALUES) ap1[_i] += value;   \
387           else                    ap1[_i] = value; \
388           goto a_noinsert; \
389         } \
390       }  \
391       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
392       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
393       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
394       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
395       N = nrow1++ - 1; a->nz++; high1++; \
396       /* shift up all the later entries in this row */ \
397       for (ii=N; ii>=_i; ii--) { \
398         rp1[ii+1] = rp1[ii]; \
399         ap1[ii+1] = ap1[ii]; \
400       } \
401       rp1[_i] = col;  \
402       ap1[_i] = value;  \
403       a_noinsert: ; \
404       ailen[row] = nrow1; \
405 }
406 
407 
408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
409 { \
410     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
411     lastcol2 = col;\
412     while (high2-low2 > 5) { \
413       t = (low2+high2)/2; \
414       if (rp2[t] > col) high2 = t; \
415       else             low2  = t; \
416     } \
417     for (_i=low2; _i<high2; _i++) {		\
418       if (rp2[_i] > col) break;			\
419       if (rp2[_i] == col) {			      \
420 	if (addv == ADD_VALUES) ap2[_i] += value;     \
421 	else                    ap2[_i] = value;      \
422 	goto b_noinsert;			      \
423       }						      \
424     }							      \
425     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
426     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
427     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
428     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
429     N = nrow2++ - 1; b->nz++; high2++;					\
430     /* shift up all the later entries in this row */			\
431     for (ii=N; ii>=_i; ii--) {						\
432       rp2[ii+1] = rp2[ii];						\
433       ap2[ii+1] = ap2[ii];						\
434     }									\
435     rp2[_i] = col;							\
436     ap2[_i] = value;							\
437     b_noinsert: ;								\
438     bilen[row] = nrow2;							\
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
444 {
445   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
446   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
447   PetscErrorCode ierr;
448   PetscInt       l,*garray = mat->garray,diag;
449 
450   PetscFunctionBegin;
451   /* code only works for square matrices A */
452 
453   /* find size of row to the left of the diagonal part */
454   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
455   row  = row - diag;
456   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
457     if (garray[b->j[b->i[row]+l]] > diag) break;
458   }
459   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
460 
461   /* diagonal part */
462   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
463 
464   /* right of diagonal part */
465   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatSetValues_MPIAIJ"
471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
472 {
473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
474   PetscScalar    value;
475   PetscErrorCode ierr;
476   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
477   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
478   PetscBool      roworiented = aij->roworiented;
479 
480   /* Some Variables required in the macro */
481   Mat            A = aij->A;
482   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
483   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
484   MatScalar      *aa = a->a;
485   PetscBool      ignorezeroentries = a->ignorezeroentries;
486   Mat            B = aij->B;
487   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
488   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
489   MatScalar      *ba = b->a;
490 
491   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
492   PetscInt       nonew;
493   MatScalar      *ap1,*ap2;
494 
495   PetscFunctionBegin;
496   if (v) PetscValidScalarPointer(v,6);
497   for (i=0; i<m; i++) {
498     if (im[i] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
501 #endif
502     if (im[i] >= rstart && im[i] < rend) {
503       row      = im[i] - rstart;
504       lastcol1 = -1;
505       rp1      = aj + ai[row];
506       ap1      = aa + ai[row];
507       rmax1    = aimax[row];
508       nrow1    = ailen[row];
509       low1     = 0;
510       high1    = nrow1;
511       lastcol2 = -1;
512       rp2      = bj + bi[row];
513       ap2      = ba + bi[row];
514       rmax2    = bimax[row];
515       nrow2    = bilen[row];
516       low2     = 0;
517       high2    = nrow2;
518 
519       for (j=0; j<n; j++) {
520         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
521         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
522         if (in[j] >= cstart && in[j] < cend){
523           col = in[j] - cstart;
524           nonew = a->nonew;
525           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
526         } else if (in[j] < 0) continue;
527 #if defined(PETSC_USE_DEBUG)
528         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
529 #endif
530         else {
531           if (mat->was_assembled) {
532             if (!aij->colmap) {
533               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
534             }
535 #if defined (PETSC_USE_CTABLE)
536             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
537 	    col--;
538 #else
539             col = aij->colmap[in[j]] - 1;
540 #endif
541             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
542               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
543               col =  in[j];
544               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
545               B = aij->B;
546               b = (Mat_SeqAIJ*)B->data;
547               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
548               rp2      = bj + bi[row];
549               ap2      = ba + bi[row];
550               rmax2    = bimax[row];
551               nrow2    = bilen[row];
552               low2     = 0;
553               high2    = nrow2;
554               bm       = aij->B->rmap->n;
555               ba = b->a;
556             }
557           } else col = in[j];
558           nonew = b->nonew;
559           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
560         }
561       }
562     } else {
563       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
564       if (!aij->donotstash) {
565         mat->assembled = PETSC_FALSE;
566         if (roworiented) {
567           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
568         } else {
569           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
570         }
571       }
572     }
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIAIJ"
579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
584   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= rstart && idxm[i] < rend) {
591       row = idxm[i] - rstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= cstart && idxn[j] < cend){
596           col = idxn[j] - cstart;
597           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!aij->colmap) {
600             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
604           col --;
605 #else
606           col = aij->colmap[idxn[j]] - 1;
607 #endif
608           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
609           else {
610             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611           }
612         }
613       }
614     } else {
615       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
616     }
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
626 {
627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
628   PetscErrorCode ierr;
629   PetscInt       nstash,reallocs;
630   InsertMode     addv;
631 
632   PetscFunctionBegin;
633   if (aij->donotstash || mat->nooffprocentries) {
634     PetscFunctionReturn(0);
635   }
636 
637   /* make sure all processors are either in INSERTMODE or ADDMODE */
638   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
639   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
640   mat->insertmode = addv; /* in case this processor had no cache */
641 
642   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
643   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
644   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
653   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
654   PetscErrorCode ierr;
655   PetscMPIInt    n;
656   PetscInt       i,j,rstart,ncols,flg;
657   PetscInt       *row,*col;
658   PetscBool      other_disassembled;
659   PetscScalar    *val;
660   InsertMode     addv = mat->insertmode;
661 
662   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
663   PetscFunctionBegin;
664   if (!aij->donotstash && !mat->nooffprocentries) {
665     while (1) {
666       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
667       if (!flg) break;
668 
669       for (i=0; i<n;) {
670         /* Now identify the consecutive vals belonging to the same row */
671         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
672         if (j < n) ncols = j-i;
673         else       ncols = n-i;
674         /* Now assemble all these values with a single function call */
675         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
676         i = j;
677       }
678     }
679     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
680   }
681   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
682   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
683 
684   /* determine if any processor has disassembled, if so we must
685      also disassemble ourselfs, in order that we may reassemble. */
686   /*
687      if nonzero structure of submatrix B cannot change then we know that
688      no processor disassembled thus we can skip this stuff
689   */
690   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
691     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
692     if (mat->was_assembled && !other_disassembled) {
693       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
694     }
695   }
696   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
697     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
698   }
699   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
700   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
701   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
702   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
703 
704   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
705   aij->rowvalues = 0;
706 
707   /* used by MatAXPY() */
708   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
709   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
710 
711   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
712   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
719 {
720   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
725   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroRows_MPIAIJ"
731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
732 {
733   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode    ierr;
735   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
736   PetscInt          i,*owners = A->rmap->range;
737   PetscInt          *nprocs,j,idx,nsends,row;
738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
739   PetscInt          *rvalues,count,base,slen,*source;
740   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
741   MPI_Comm          comm = ((PetscObject)A)->comm;
742   MPI_Request       *send_waits,*recv_waits;
743   MPI_Status        recv_status,*send_status;
744   const PetscScalar *xx;
745   PetscScalar       *bb;
746 #if defined(PETSC_DEBUG)
747   PetscBool      found = PETSC_FALSE;
748 #endif
749 
750   PetscFunctionBegin;
751   /*  first count number of contributors to each processor */
752   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
753   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
754   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
755   j = 0;
756   for (i=0; i<N; i++) {
757     if (lastidx > (idx = rows[i])) j = 0;
758     lastidx = idx;
759     for (; j<size; j++) {
760       if (idx >= owners[j] && idx < owners[j+1]) {
761         nprocs[2*j]++;
762         nprocs[2*j+1] = 1;
763         owner[i] = j;
764 #if defined(PETSC_DEBUG)
765         found = PETSC_TRUE;
766 #endif
767         break;
768       }
769     }
770 #if defined(PETSC_DEBUG)
771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
772     found = PETSC_FALSE;
773 #endif
774   }
775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
776 
777   if (A->nooffproczerorows) {
778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
779     nrecvs = nsends;
780     nmax   = N;
781   } else {
782     /* inform other processors of number of messages and max length*/
783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
784   }
785 
786   /* post receives:   */
787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
789   for (i=0; i<nrecvs; i++) {
790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
791   }
792 
793   /* do sends:
794       1) starts[i] gives the starting index in svalues for stuff going to
795          the ith processor
796   */
797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
800   starts[0] = 0;
801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
802   for (i=0; i<N; i++) {
803     svalues[starts[owner[i]]++] = rows[i];
804   }
805 
806   starts[0] = 0;
807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
808   count = 0;
809   for (i=0; i<size; i++) {
810     if (nprocs[2*i+1]) {
811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
812     }
813   }
814   ierr = PetscFree(starts);CHKERRQ(ierr);
815 
816   base = owners[rank];
817 
818   /*  wait on receives */
819   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
820   count  = nrecvs; slen = 0;
821   while (count) {
822     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
823     /* unpack receives into our local space */
824     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
825     source[imdex]  = recv_status.MPI_SOURCE;
826     lens[imdex]    = n;
827     slen          += n;
828     count--;
829   }
830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
831 
832   /* move the data into the send scatter */
833   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
834   count = 0;
835   for (i=0; i<nrecvs; i++) {
836     values = rvalues + i*nmax;
837     for (j=0; j<lens[i]; j++) {
838       lrows[count++] = values[j] - base;
839     }
840   }
841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
843   ierr = PetscFree(owner);CHKERRQ(ierr);
844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
845 
846   /* fix right hand side if needed */
847   if (x && b) {
848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
850     for (i=0; i<slen; i++) {
851       bb[lrows[i]] = diag*xx[lrows[i]];
852     }
853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
855   }
856   /*
857         Zero the required rows. If the "diagonal block" of the matrix
858      is square and the user wishes to set the diagonal we use separate
859      code so that MatSetValues() is not called for each diagonal allocating
860      new memory, thus calling lots of mallocs and slowing things down.
861 
862   */
863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
864   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
866     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
867   } else if (diag != 0.0) {
868     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
869     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
870       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
872     }
873     for (i = 0; i < slen; i++) {
874       row  = lrows[i] + rstart;
875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
876     }
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879   } else {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(lrows);CHKERRQ(ierr);
883 
884   /* wait on sends */
885   if (nsends) {
886     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
887     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
888     ierr = PetscFree(send_status);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(send_waits);CHKERRQ(ierr);
891   ierr = PetscFree(svalues);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
898 {
899   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
900   PetscErrorCode    ierr;
901   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
902   PetscInt          i,*owners = A->rmap->range;
903   PetscInt          *nprocs,j,idx,nsends;
904   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
905   PetscInt          *rvalues,count,base,slen,*source;
906   PetscInt          *lens,*lrows,*values,m;
907   MPI_Comm          comm = ((PetscObject)A)->comm;
908   MPI_Request       *send_waits,*recv_waits;
909   MPI_Status        recv_status,*send_status;
910   const PetscScalar *xx;
911   PetscScalar       *bb,*mask;
912   Vec               xmask,lmask;
913   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
914   const PetscInt    *aj, *ii,*ridx;
915   PetscScalar       *aa;
916 #if defined(PETSC_DEBUG)
917   PetscBool         found = PETSC_FALSE;
918 #endif
919 
920   PetscFunctionBegin;
921   /*  first count number of contributors to each processor */
922   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
923   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
924   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
925   j = 0;
926   for (i=0; i<N; i++) {
927     if (lastidx > (idx = rows[i])) j = 0;
928     lastidx = idx;
929     for (; j<size; j++) {
930       if (idx >= owners[j] && idx < owners[j+1]) {
931         nprocs[2*j]++;
932         nprocs[2*j+1] = 1;
933         owner[i] = j;
934 #if defined(PETSC_DEBUG)
935         found = PETSC_TRUE;
936 #endif
937         break;
938       }
939     }
940 #if defined(PETSC_DEBUG)
941     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
942     found = PETSC_FALSE;
943 #endif
944   }
945   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
946 
947   /* inform other processors of number of messages and max length*/
948   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
949 
950   /* post receives:   */
951   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
952   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
953   for (i=0; i<nrecvs; i++) {
954     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
955   }
956 
957   /* do sends:
958       1) starts[i] gives the starting index in svalues for stuff going to
959          the ith processor
960   */
961   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
963   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
964   starts[0] = 0;
965   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
966   for (i=0; i<N; i++) {
967     svalues[starts[owner[i]]++] = rows[i];
968   }
969 
970   starts[0] = 0;
971   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
972   count = 0;
973   for (i=0; i<size; i++) {
974     if (nprocs[2*i+1]) {
975       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
976     }
977   }
978   ierr = PetscFree(starts);CHKERRQ(ierr);
979 
980   base = owners[rank];
981 
982   /*  wait on receives */
983   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
984   count  = nrecvs; slen = 0;
985   while (count) {
986     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
987     /* unpack receives into our local space */
988     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
989     source[imdex]  = recv_status.MPI_SOURCE;
990     lens[imdex]    = n;
991     slen          += n;
992     count--;
993   }
994   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
995 
996   /* move the data into the send scatter */
997   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
998   count = 0;
999   for (i=0; i<nrecvs; i++) {
1000     values = rvalues + i*nmax;
1001     for (j=0; j<lens[i]; j++) {
1002       lrows[count++] = values[j] - base;
1003     }
1004   }
1005   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1006   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1007   ierr = PetscFree(owner);CHKERRQ(ierr);
1008   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1009   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1010 
1011   /* zero diagonal part of matrix */
1012   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1013 
1014   /* handle off diagonal part of matrix */
1015   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1016   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1017   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1018   for (i=0; i<slen; i++) {
1019     bb[lrows[i]] = 1;
1020   }
1021   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1022   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1025   if (x) {
1026     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1029     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1030   }
1031   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1032 
1033   /* remove zeroed rows of off diagonal matrix */
1034   ii = aij->i;
1035   for (i=0; i<slen; i++) {
1036     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1037   }
1038 
1039   /* loop over all elements of off process part of matrix zeroing removed columns*/
1040   if (aij->compressedrow.use){
1041     m    = aij->compressedrow.nrows;
1042     ii   = aij->compressedrow.i;
1043     ridx = aij->compressedrow.rindex;
1044     for (i=0; i<m; i++){
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048 
1049       for (j=0; j<n; j++) {
1050         if (PetscAbsScalar(mask[*aj])) {
1051           if (b) bb[*ridx] -= *aa*xx[*aj];
1052           *aa        = 0.0;
1053         }
1054         aa++;
1055         aj++;
1056       }
1057       ridx++;
1058     }
1059   } else { /* do not use compressed row format */
1060     m = l->B->rmap->n;
1061     for (i=0; i<m; i++) {
1062       n   = ii[i+1] - ii[i];
1063       aj  = aij->j + ii[i];
1064       aa  = aij->a + ii[i];
1065       for (j=0; j<n; j++) {
1066         if (PetscAbsScalar(mask[*aj])) {
1067           if (b) bb[i] -= *aa*xx[*aj];
1068           *aa    = 0.0;
1069         }
1070         aa++;
1071         aj++;
1072       }
1073     }
1074   }
1075   if (x) {
1076     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1077     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1078   }
1079   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1080   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1081   ierr = PetscFree(lrows);CHKERRQ(ierr);
1082 
1083   /* wait on sends */
1084   if (nsends) {
1085     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1086     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1087     ierr = PetscFree(send_status);CHKERRQ(ierr);
1088   }
1089   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1090   ierr = PetscFree(svalues);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMult_MPIAIJ"
1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101   PetscInt       nt;
1102 
1103   PetscFunctionBegin;
1104   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1105   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1106   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1108   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1143 {
1144   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscBool      merged;
1147 
1148   PetscFunctionBegin;
1149   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1150   /* do nondiagonal part */
1151   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1152   if (!merged) {
1153     /* send it on its way */
1154     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     /* do local part */
1156     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1157     /* receive remote parts: note this assumes the values are not actually */
1158     /* added in yy until the next line, */
1159     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160   } else {
1161     /* do local part */
1162     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1163     /* send it on its way */
1164     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165     /* values actually were received in the Begin() but we need to call this nop */
1166     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1174 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1175 {
1176   MPI_Comm       comm;
1177   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1178   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1179   IS             Me,Notme;
1180   PetscErrorCode ierr;
1181   PetscInt       M,N,first,last,*notme,i;
1182   PetscMPIInt    size;
1183 
1184   PetscFunctionBegin;
1185 
1186   /* Easy test: symmetric diagonal block */
1187   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1188   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1189   if (!*f) PetscFunctionReturn(0);
1190   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1192   if (size == 1) PetscFunctionReturn(0);
1193 
1194   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1195   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1196   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1197   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1198   for (i=0; i<first; i++) notme[i] = i;
1199   for (i=last; i<M; i++) notme[i-last+first] = i;
1200   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1201   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1202   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1203   Aoff = Aoffs[0];
1204   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1205   Boff = Boffs[0];
1206   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1207   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1209   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1211   ierr = PetscFree(notme);CHKERRQ(ierr);
1212    PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* send it on its way */
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   /* do local part */
1229   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1230   /* receive remote parts */
1231   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*
1236   This only works correctly for square matrices where the subblock A->A is the
1237    diagonal block
1238 */
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1242 {
1243   PetscErrorCode ierr;
1244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1248   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1249   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatScale_MPIAIJ"
1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1256 {
1257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1262   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIAIJ"
1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1279   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1281 #if defined (PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode    ierr;
1313   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1314   int               fd;
1315   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1316   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1317   PetscScalar       *column_values;
1318   PetscInt          message_count,flowcontrolcount;
1319   FILE              *file;
1320 
1321   PetscFunctionBegin;
1322   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1323   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1324   nz   = A->nz + B->nz;
1325   if (!rank) {
1326     header[0] = MAT_FILE_CLASSID;
1327     header[1] = mat->rmap->N;
1328     header[2] = mat->cmap->N;
1329     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1331     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     /* get largest number of rows any processor has */
1333     rlen = mat->rmap->n;
1334     range = mat->rmap->range;
1335     for (i=1; i<size; i++) {
1336       rlen = PetscMax(rlen,range[i+1] - range[i]);
1337     }
1338   } else {
1339     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     rlen = mat->rmap->n;
1341   }
1342 
1343   /* load up the local row counts */
1344   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1345   for (i=0; i<mat->rmap->n; i++) {
1346     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347   }
1348 
1349   /* store the row lengths to the file */
1350   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1351   if (!rank) {
1352     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     for (i=1; i<size; i++) {
1354       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1355       rlen = range[i+1] - range[i];
1356       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1357       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1360   } else {
1361     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1362     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1363     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1366 
1367   /* load up the local column indices */
1368   nzmax = nz; /* )th processor needs space a largest processor needs */
1369   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1370   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1371   cnt  = 0;
1372   for (i=0; i<mat->rmap->n; i++) {
1373     for (j=B->i[i]; j<B->i[i+1]; j++) {
1374       if ( (col = garray[B->j[j]]) > cstart) break;
1375       column_indices[cnt++] = col;
1376     }
1377     for (k=A->i[i]; k<A->i[i+1]; k++) {
1378       column_indices[cnt++] = A->j[k] + cstart;
1379     }
1380     for (; j<B->i[i+1]; j++) {
1381       column_indices[cnt++] = garray[B->j[j]];
1382     }
1383   }
1384   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1385 
1386   /* store the column indices to the file */
1387    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1388   if (!rank) {
1389     MPI_Status status;
1390     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1391     for (i=1; i<size; i++) {
1392       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1393       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1394       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1395       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1396       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1397     }
1398      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1399   } else {
1400     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1401     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1402     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1403     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1404   }
1405   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1406 
1407   /* load up the local column values */
1408   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1409   cnt  = 0;
1410   for (i=0; i<mat->rmap->n; i++) {
1411     for (j=B->i[i]; j<B->i[i+1]; j++) {
1412       if ( garray[B->j[j]] > cstart) break;
1413       column_values[cnt++] = B->a[j];
1414     }
1415     for (k=A->i[i]; k<A->i[i+1]; k++) {
1416       column_values[cnt++] = A->a[k];
1417     }
1418     for (; j<B->i[i+1]; j++) {
1419       column_values[cnt++] = B->a[j];
1420     }
1421   }
1422   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1423 
1424   /* store the column values to the file */
1425    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1426   if (!rank) {
1427     MPI_Status status;
1428     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1429     for (i=1; i<size; i++) {
1430        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1431       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1432       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1433       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1434       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1435     }
1436     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1437   } else {
1438     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1439     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1440     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1441     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1442   }
1443   ierr = PetscFree(column_values);CHKERRQ(ierr);
1444 
1445   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1446   if (file) {
1447     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1448   }
1449 
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1455 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1456 {
1457   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1458   PetscErrorCode    ierr;
1459   PetscMPIInt       rank = aij->rank,size = aij->size;
1460   PetscBool         isdraw,iascii,isbinary;
1461   PetscViewer       sviewer;
1462   PetscViewerFormat format;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   if (iascii) {
1469     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1470     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1471       MatInfo    info;
1472       PetscBool  inodes;
1473 
1474       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1475       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1478       if (!inodes) {
1479         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1480 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1481       } else {
1482         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1483 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1484       }
1485       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1487       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1489       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1490       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1492       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1493       PetscFunctionReturn(0);
1494     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1495       PetscInt   inodecount,inodelimit,*inodes;
1496       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1497       if (inodes) {
1498         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1499       } else {
1500         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1501       }
1502       PetscFunctionReturn(0);
1503     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1504       PetscFunctionReturn(0);
1505     }
1506   } else if (isbinary) {
1507     if (size == 1) {
1508       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1509       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1510     } else {
1511       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1512     }
1513     PetscFunctionReturn(0);
1514   } else if (isdraw) {
1515     PetscDraw  draw;
1516     PetscBool  isnull;
1517     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1518     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1519   }
1520 
1521   if (size == 1) {
1522     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1523     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1524   } else {
1525     /* assemble the entire matrix onto first processor. */
1526     Mat         A;
1527     Mat_SeqAIJ  *Aloc;
1528     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1529     MatScalar   *a;
1530 
1531     if (mat->rmap->N > 1024) {
1532       PetscBool  flg = PETSC_FALSE;
1533 
1534       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1535       if (!flg) {
1536         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1537       }
1538     }
1539 
1540     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1541     if (!rank) {
1542       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1543     } else {
1544       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1545     }
1546     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1547     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1548     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1549     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1550     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1551 
1552     /* copy over the A part */
1553     Aloc = (Mat_SeqAIJ*)aij->A->data;
1554     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row = mat->rmap->rstart;
1556     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1557     for (i=0; i<m; i++) {
1558       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1559       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1560     }
1561     aj = Aloc->j;
1562     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1563 
1564     /* copy over the B part */
1565     Aloc = (Mat_SeqAIJ*)aij->B->data;
1566     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567     row  = mat->rmap->rstart;
1568     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1569     ct   = cols;
1570     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1571     for (i=0; i<m; i++) {
1572       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1573       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1574     }
1575     ierr = PetscFree(ct);CHKERRQ(ierr);
1576     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578     /*
1579        Everyone has to call to draw the matrix since the graphics waits are
1580        synchronized across all processors that share the PetscDraw object
1581     */
1582     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1583     if (!rank) {
1584       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1585       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1586       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1587       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1588     }
1589     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1590     ierr = MatDestroy(&A);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatView_MPIAIJ"
1597 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1598 {
1599   PetscErrorCode ierr;
1600   PetscBool      iascii,isdraw,issocket,isbinary;
1601 
1602   PetscFunctionBegin;
1603   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1604   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1605   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1606   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1607   if (iascii || isdraw || isbinary || issocket) {
1608     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1609   } else {
1610     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatSOR_MPIAIJ"
1617 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1618 {
1619   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1620   PetscErrorCode ierr;
1621   Vec            bb1 = 0;
1622   PetscBool      hasop;
1623 
1624   PetscFunctionBegin;
1625   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1626     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1627   }
1628 
1629   if (flag == SOR_APPLY_UPPER) {
1630     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1631     PetscFunctionReturn(0);
1632   }
1633 
1634   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639 
1640     while (its--) {
1641       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643 
1644       /* update rhs: bb1 = bb - B*x */
1645       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1646       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1647 
1648       /* local sweep */
1649       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1650     }
1651   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1652     if (flag & SOR_ZERO_INITIAL_GUESS) {
1653       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1654       its--;
1655     }
1656     while (its--) {
1657       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659 
1660       /* update rhs: bb1 = bb - B*x */
1661       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1662       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1663 
1664       /* local sweep */
1665       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1666     }
1667   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1668     if (flag & SOR_ZERO_INITIAL_GUESS) {
1669       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1670       its--;
1671     }
1672     while (its--) {
1673       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675 
1676       /* update rhs: bb1 = bb - B*x */
1677       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1678       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1679 
1680       /* local sweep */
1681       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1682     }
1683   }  else if (flag & SOR_EISENSTAT) {
1684     Vec         xx1;
1685 
1686     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1687     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1688 
1689     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1690     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1691     if (!mat->diag) {
1692       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1693       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1694     }
1695     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1696     if (hasop) {
1697       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1698     } else {
1699       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1700     }
1701     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1702 
1703     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1704 
1705     /* local sweep */
1706     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1707     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1708     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1709   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1710 
1711   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatPermute_MPIAIJ"
1717 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1718 {
1719   MPI_Comm       comm;
1720   PetscInt       first,local_rowsize,local_colsize;
1721   const PetscInt *rows;
1722   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1723   PetscErrorCode ierr;
1724 
1725   PetscFunctionBegin;
1726   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1727   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1728   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1729   /* collect the global row permutation and invert it */
1730   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1731   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1732   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1733   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1734   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1735   /* get the local target indices */
1736   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1737   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1738   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1739   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1740   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1741   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1742   /* the column permutation is so much easier;
1743      make a local version of 'colp' and invert it */
1744   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1745   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1746   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1747   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1748   /* now we just get the submatrix */
1749   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1750   /* clean up */
1751   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1758 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1759 {
1760   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1761   Mat            A = mat->A,B = mat->B;
1762   PetscErrorCode ierr;
1763   PetscReal      isend[5],irecv[5];
1764 
1765   PetscFunctionBegin;
1766   info->block_size     = 1.0;
1767   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1768   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1769   isend[3] = info->memory;  isend[4] = info->mallocs;
1770   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1771   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1772   isend[3] += info->memory;  isend[4] += info->mallocs;
1773   if (flag == MAT_LOCAL) {
1774     info->nz_used      = isend[0];
1775     info->nz_allocated = isend[1];
1776     info->nz_unneeded  = isend[2];
1777     info->memory       = isend[3];
1778     info->mallocs      = isend[4];
1779   } else if (flag == MAT_GLOBAL_MAX) {
1780     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1781     info->nz_used      = irecv[0];
1782     info->nz_allocated = irecv[1];
1783     info->nz_unneeded  = irecv[2];
1784     info->memory       = irecv[3];
1785     info->mallocs      = irecv[4];
1786   } else if (flag == MAT_GLOBAL_SUM) {
1787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1788     info->nz_used      = irecv[0];
1789     info->nz_allocated = irecv[1];
1790     info->nz_unneeded  = irecv[2];
1791     info->memory       = irecv[3];
1792     info->mallocs      = irecv[4];
1793   }
1794   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1795   info->fill_ratio_needed = 0;
1796   info->factor_mallocs    = 0;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatSetOption_MPIAIJ"
1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1803 {
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   switch (op) {
1809   case MAT_NEW_NONZERO_LOCATIONS:
1810   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1811   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1812   case MAT_KEEP_NONZERO_PATTERN:
1813   case MAT_NEW_NONZERO_LOCATION_ERR:
1814   case MAT_USE_INODES:
1815   case MAT_IGNORE_ZERO_ENTRIES:
1816     MatCheckPreallocated(A,1);
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_ROW_ORIENTED:
1821     a->roworiented = flg;
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_NEW_DIAGONALS:
1826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1827     break;
1828   case MAT_IGNORE_OFF_PROC_ENTRIES:
1829     a->donotstash = flg;
1830     break;
1831   case MAT_SPD:
1832     A->spd_set                         = PETSC_TRUE;
1833     A->spd                             = flg;
1834     if (flg) {
1835       A->symmetric                     = PETSC_TRUE;
1836       A->structurally_symmetric        = PETSC_TRUE;
1837       A->symmetric_set                 = PETSC_TRUE;
1838       A->structurally_symmetric_set    = PETSC_TRUE;
1839     }
1840     break;
1841   case MAT_SYMMETRIC:
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   case MAT_STRUCTURALLY_SYMMETRIC:
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   case MAT_HERMITIAN:
1848     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1849     break;
1850   case MAT_SYMMETRY_ETERNAL:
1851     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1852     break;
1853   default:
1854     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetRow_MPIAIJ"
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) { max = tmp; }
1883     }
1884     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap  = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1942 {
1943   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1944 
1945   PetscFunctionBegin;
1946   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1947   aij->getrowactive = PETSC_FALSE;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatNorm_MPIAIJ"
1953 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1954 {
1955   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1956   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1957   PetscErrorCode ierr;
1958   PetscInt       i,j,cstart = mat->cmap->rstart;
1959   PetscReal      sum = 0.0;
1960   MatScalar      *v;
1961 
1962   PetscFunctionBegin;
1963   if (aij->size == 1) {
1964     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1965   } else {
1966     if (type == NORM_FROBENIUS) {
1967       v = amat->a;
1968       for (i=0; i<amat->nz; i++) {
1969 #if defined(PETSC_USE_COMPLEX)
1970         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971 #else
1972         sum += (*v)*(*v); v++;
1973 #endif
1974       }
1975       v = bmat->a;
1976       for (i=0; i<bmat->nz; i++) {
1977 #if defined(PETSC_USE_COMPLEX)
1978         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1979 #else
1980         sum += (*v)*(*v); v++;
1981 #endif
1982       }
1983       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1984       *norm = PetscSqrtReal(*norm);
1985     } else if (type == NORM_1) { /* max column norm */
1986       PetscReal *tmp,*tmp2;
1987       PetscInt  *jj,*garray = aij->garray;
1988       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1989       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1990       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1991       *norm = 0.0;
1992       v = amat->a; jj = amat->j;
1993       for (j=0; j<amat->nz; j++) {
1994         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1995       }
1996       v = bmat->a; jj = bmat->j;
1997       for (j=0; j<bmat->nz; j++) {
1998         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1999       }
2000       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2001       for (j=0; j<mat->cmap->N; j++) {
2002         if (tmp2[j] > *norm) *norm = tmp2[j];
2003       }
2004       ierr = PetscFree(tmp);CHKERRQ(ierr);
2005       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2006     } else if (type == NORM_INFINITY) { /* max row norm */
2007       PetscReal ntemp = 0.0;
2008       for (j=0; j<aij->A->rmap->n; j++) {
2009         v = amat->a + amat->i[j];
2010         sum = 0.0;
2011         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2012           sum += PetscAbsScalar(*v); v++;
2013         }
2014         v = bmat->a + bmat->i[j];
2015         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2016           sum += PetscAbsScalar(*v); v++;
2017         }
2018         if (sum > ntemp) ntemp = sum;
2019       }
2020       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2021     } else {
2022       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2023     }
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatTranspose_MPIAIJ"
2030 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2031 {
2032   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2033   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2034   PetscErrorCode ierr;
2035   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2036   PetscInt       cstart=A->cmap->rstart,ncol;
2037   Mat            B;
2038   MatScalar      *array;
2039 
2040   PetscFunctionBegin;
2041   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2042 
2043   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2044   ai = Aloc->i; aj = Aloc->j;
2045   bi = Bloc->i; bj = Bloc->j;
2046   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2047     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2048     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2049     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2050     for (i=0; i<ai[ma]; i++){
2051       d_nnz[aj[i]] ++;
2052       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2053     }
2054 
2055     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2056     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2057     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2058     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2059     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2061     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2062   } else {
2063     B = *matout;
2064     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2065     for (i=0; i<ai[ma]; i++){
2066       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2067     }
2068   }
2069 
2070   /* copy over the A part */
2071   array = Aloc->a;
2072   row = A->rmap->rstart;
2073   for (i=0; i<ma; i++) {
2074     ncol = ai[i+1]-ai[i];
2075     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++; array += ncol; aj += ncol;
2077   }
2078   aj = Aloc->j;
2079   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2080 
2081   /* copy over the B part */
2082   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2083   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2084   array = Bloc->a;
2085   row = A->rmap->rstart;
2086   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2087   cols_tmp = cols;
2088   for (i=0; i<mb; i++) {
2089     ncol = bi[i+1]-bi[i];
2090     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2091     row++; array += ncol; cols_tmp += ncol;
2092   }
2093   ierr = PetscFree(cols);CHKERRQ(ierr);
2094 
2095   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2098     *matout = B;
2099   } else {
2100     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2107 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2108 {
2109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2110   Mat            a = aij->A,b = aij->B;
2111   PetscErrorCode ierr;
2112   PetscInt       s1,s2,s3;
2113 
2114   PetscFunctionBegin;
2115   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2116   if (rr) {
2117     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2118     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2119     /* Overlap communication with computation. */
2120     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2121   }
2122   if (ll) {
2123     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2124     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2125     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2126   }
2127   /* scale  the diagonal block */
2128   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2129 
2130   if (rr) {
2131     /* Do a scatter end and then right scale the off-diagonal block */
2132     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2133     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2134   }
2135 
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIAIJ"
2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156   Mat            a,b,c,d;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A; b = matA->B;
2162   c = matB->A; d = matB->B;
2163 
2164   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2165   if (flg) {
2166     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2167   }
2168   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatCopy_MPIAIJ"
2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175 {
2176   PetscErrorCode ierr;
2177   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179 
2180   PetscFunctionBegin;
2181   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183     /* because of the column compression in the off-processor part of the matrix a->B,
2184        the number of columns in a->B and b->B may be different, hence we cannot call
2185        the MatCopy() directly on the two parts. If need be, we can provide a more
2186        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187        then copying the submatrices */
2188     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2191     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUp_MPIAIJ"
2198 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2199 {
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2211 {
2212   PetscInt          i,m=Y->rmap->N;
2213   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2214   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2215   const PetscInt    *xi = x->i,*yi = y->i;
2216 
2217   PetscFunctionBegin;
2218   /* Set the number of nonzeros in the new matrix */
2219   for(i=0; i<m; i++) {
2220     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2221     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2222     nnz[i] = 0;
2223     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2224       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2225       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2226       nnz[i]++;
2227     }
2228     for (; k<nzy; k++) nnz[i]++;
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatAXPY_MPIAIJ"
2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   PetscInt       i;
2239   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2240   PetscBLASInt   bnz,one=1;
2241   Mat_SeqAIJ     *x,*y;
2242 
2243   PetscFunctionBegin;
2244   if (str == SAME_NONZERO_PATTERN) {
2245     PetscScalar alpha = a;
2246     x = (Mat_SeqAIJ *)xx->A->data;
2247     y = (Mat_SeqAIJ *)yy->A->data;
2248     bnz = PetscBLASIntCast(x->nz);
2249     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2250     x = (Mat_SeqAIJ *)xx->B->data;
2251     y = (Mat_SeqAIJ *)yy->B->data;
2252     bnz = PetscBLASIntCast(x->nz);
2253     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2254   } else if (str == SUBSET_NONZERO_PATTERN) {
2255     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2256 
2257     x = (Mat_SeqAIJ *)xx->B->data;
2258     y = (Mat_SeqAIJ *)yy->B->data;
2259     if (y->xtoy && y->XtoY != xx->B) {
2260       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2261       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2262     }
2263     if (!y->xtoy) { /* get xtoy */
2264       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2265       y->XtoY = xx->B;
2266       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2267     }
2268     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2269   } else {
2270     Mat B;
2271     PetscInt *nnz_d,*nnz_o;
2272     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2273     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2274     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2275     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2276     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2277     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2278     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2279     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2280     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2281     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2282     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2283     ierr = MatHeaderReplace(Y,B);
2284     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2285     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatConjugate_MPIAIJ"
2294 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2295 {
2296 #if defined(PETSC_USE_COMPLEX)
2297   PetscErrorCode ierr;
2298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2302   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2303 #else
2304   PetscFunctionBegin;
2305 #endif
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatRealPart_MPIAIJ"
2311 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2312 {
2313   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2318   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2324 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2325 {
2326   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2327   PetscErrorCode ierr;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2331   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #ifdef PETSC_HAVE_PBGL
2336 
2337 #include <boost/parallel/mpi/bsp_process_group.hpp>
2338 #include <boost/graph/distributed/ilu_default_graph.hpp>
2339 #include <boost/graph/distributed/ilu_0_block.hpp>
2340 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2341 #include <boost/graph/distributed/petsc/interface.hpp>
2342 #include <boost/multi_array.hpp>
2343 #include <boost/parallel/distributed_property_map->hpp>
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2347 /*
2348   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2349 */
2350 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2351 {
2352   namespace petsc = boost::distributed::petsc;
2353 
2354   namespace graph_dist = boost::graph::distributed;
2355   using boost::graph::distributed::ilu_default::process_group_type;
2356   using boost::graph::ilu_permuted;
2357 
2358   PetscBool       row_identity, col_identity;
2359   PetscContainer  c;
2360   PetscInt        m, n, M, N;
2361   PetscErrorCode  ierr;
2362 
2363   PetscFunctionBegin;
2364   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2365   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2366   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2367   if (!row_identity || !col_identity) {
2368     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2369   }
2370 
2371   process_group_type pg;
2372   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2373   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2374   lgraph_type&   level_graph = *lgraph_p;
2375   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2376 
2377   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2378   ilu_permuted(level_graph);
2379 
2380   /* put together the new matrix */
2381   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2382   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2383   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2384   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2385   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2386   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389 
2390   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2391   ierr = PetscContainerSetPointer(c, lgraph_p);
2392   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2393   ierr = PetscContainerDestroy(&c);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2399 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2400 {
2401   PetscFunctionBegin;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatSolve_MPIAIJ"
2407 /*
2408   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2409 */
2410 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2411 {
2412   namespace graph_dist = boost::graph::distributed;
2413 
2414   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2415   lgraph_type*   lgraph_p;
2416   PetscContainer c;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2421   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2422   ierr = VecCopy(b, x);CHKERRQ(ierr);
2423 
2424   PetscScalar* array_x;
2425   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2426   PetscInt sx;
2427   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2428 
2429   PetscScalar* array_b;
2430   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2431   PetscInt sb;
2432   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2433 
2434   lgraph_type&   level_graph = *lgraph_p;
2435   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2436 
2437   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2438   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2439                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2440 
2441   typedef boost::iterator_property_map<array_ref_type::iterator,
2442                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2443   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2444                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2445 
2446   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2447 
2448   PetscFunctionReturn(0);
2449 }
2450 #endif
2451 
2452 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2453   PetscInt       nzlocal,nsends,nrecvs;
2454   PetscMPIInt    *send_rank,*recv_rank;
2455   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2456   PetscScalar    *sbuf_a,**rbuf_a;
2457   PetscErrorCode (*Destroy)(Mat);
2458 } Mat_Redundant;
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2462 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2463 {
2464   PetscErrorCode       ierr;
2465   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2466   PetscInt             i;
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2470   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2471   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2472   for (i=0; i<redund->nrecvs; i++){
2473     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2474     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2475   }
2476   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2477   ierr = PetscFree(redund);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatDestroy_MatRedundant"
2483 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2484 {
2485   PetscErrorCode  ierr;
2486   PetscContainer  container;
2487   Mat_Redundant   *redund=PETSC_NULL;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2491   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2492   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2493   A->ops->destroy = redund->Destroy;
2494   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2495   if (A->ops->destroy) {
2496     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2497   }
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2503 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2504 {
2505   PetscMPIInt    rank,size;
2506   MPI_Comm       comm=((PetscObject)mat)->comm;
2507   PetscErrorCode ierr;
2508   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2509   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2510   PetscInt       *rowrange=mat->rmap->range;
2511   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2512   Mat            A=aij->A,B=aij->B,C=*matredundant;
2513   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2514   PetscScalar    *sbuf_a;
2515   PetscInt       nzlocal=a->nz+b->nz;
2516   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2517   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2518   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2519   MatScalar      *aworkA,*aworkB;
2520   PetscScalar    *vals;
2521   PetscMPIInt    tag1,tag2,tag3,imdex;
2522   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2523                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2524   MPI_Status     recv_status,*send_status;
2525   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2526   PetscInt       **rbuf_j=PETSC_NULL;
2527   PetscScalar    **rbuf_a=PETSC_NULL;
2528   Mat_Redundant  *redund=PETSC_NULL;
2529   PetscContainer container;
2530 
2531   PetscFunctionBegin;
2532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2534 
2535   if (reuse == MAT_REUSE_MATRIX) {
2536     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2537     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2538     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2539     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2540     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2541     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2542     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2543     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2544 
2545     nsends    = redund->nsends;
2546     nrecvs    = redund->nrecvs;
2547     send_rank = redund->send_rank;
2548     recv_rank = redund->recv_rank;
2549     sbuf_nz   = redund->sbuf_nz;
2550     rbuf_nz   = redund->rbuf_nz;
2551     sbuf_j    = redund->sbuf_j;
2552     sbuf_a    = redund->sbuf_a;
2553     rbuf_j    = redund->rbuf_j;
2554     rbuf_a    = redund->rbuf_a;
2555   }
2556 
2557   if (reuse == MAT_INITIAL_MATRIX){
2558     PetscMPIInt  subrank,subsize;
2559     PetscInt     nleftover,np_subcomm;
2560     /* get the destination processors' id send_rank, nsends and nrecvs */
2561     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2562     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2563     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2564     np_subcomm = size/nsubcomm;
2565     nleftover  = size - nsubcomm*np_subcomm;
2566     nsends = 0; nrecvs = 0;
2567     for (i=0; i<size; i++){ /* i=rank*/
2568       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2569         send_rank[nsends] = i; nsends++;
2570         recv_rank[nrecvs++] = i;
2571       }
2572     }
2573     if (rank >= size - nleftover){/* this proc is a leftover processor */
2574       i = size-nleftover-1;
2575       j = 0;
2576       while (j < nsubcomm - nleftover){
2577         send_rank[nsends++] = i;
2578         i--; j++;
2579       }
2580     }
2581 
2582     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2583       for (i=0; i<nleftover; i++){
2584         recv_rank[nrecvs++] = size-nleftover+i;
2585       }
2586     }
2587 
2588     /* allocate sbuf_j, sbuf_a */
2589     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2590     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2591     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2592   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2593 
2594   /* copy mat's local entries into the buffers */
2595   if (reuse == MAT_INITIAL_MATRIX){
2596     rownz_max = 0;
2597     rptr = sbuf_j;
2598     cols = sbuf_j + rend-rstart + 1;
2599     vals = sbuf_a;
2600     rptr[0] = 0;
2601     for (i=0; i<rend-rstart; i++){
2602       row = i + rstart;
2603       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2604       ncols  = nzA + nzB;
2605       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2606       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2607       /* load the column indices for this row into cols */
2608       lwrite = 0;
2609       for (l=0; l<nzB; l++) {
2610         if ((ctmp = bmap[cworkB[l]]) < cstart){
2611           vals[lwrite]   = aworkB[l];
2612           cols[lwrite++] = ctmp;
2613         }
2614       }
2615       for (l=0; l<nzA; l++){
2616         vals[lwrite]   = aworkA[l];
2617         cols[lwrite++] = cstart + cworkA[l];
2618       }
2619       for (l=0; l<nzB; l++) {
2620         if ((ctmp = bmap[cworkB[l]]) >= cend){
2621           vals[lwrite]   = aworkB[l];
2622           cols[lwrite++] = ctmp;
2623         }
2624       }
2625       vals += ncols;
2626       cols += ncols;
2627       rptr[i+1] = rptr[i] + ncols;
2628       if (rownz_max < ncols) rownz_max = ncols;
2629     }
2630     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2631   } else { /* only copy matrix values into sbuf_a */
2632     rptr = sbuf_j;
2633     vals = sbuf_a;
2634     rptr[0] = 0;
2635     for (i=0; i<rend-rstart; i++){
2636       row = i + rstart;
2637       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2638       ncols  = nzA + nzB;
2639       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2640       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2641       lwrite = 0;
2642       for (l=0; l<nzB; l++) {
2643         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2644       }
2645       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2646       for (l=0; l<nzB; l++) {
2647         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2648       }
2649       vals += ncols;
2650       rptr[i+1] = rptr[i] + ncols;
2651     }
2652   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2653 
2654   /* send nzlocal to others, and recv other's nzlocal */
2655   /*--------------------------------------------------*/
2656   if (reuse == MAT_INITIAL_MATRIX){
2657     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2658     s_waits2 = s_waits3 + nsends;
2659     s_waits1 = s_waits2 + nsends;
2660     r_waits1 = s_waits1 + nsends;
2661     r_waits2 = r_waits1 + nrecvs;
2662     r_waits3 = r_waits2 + nrecvs;
2663   } else {
2664     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2665     r_waits3 = s_waits3 + nsends;
2666   }
2667 
2668   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2669   if (reuse == MAT_INITIAL_MATRIX){
2670     /* get new tags to keep the communication clean */
2671     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2672     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2673     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2674 
2675     /* post receives of other's nzlocal */
2676     for (i=0; i<nrecvs; i++){
2677       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2678     }
2679     /* send nzlocal to others */
2680     for (i=0; i<nsends; i++){
2681       sbuf_nz[i] = nzlocal;
2682       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2683     }
2684     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2685     count = nrecvs;
2686     while (count) {
2687       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2688       recv_rank[imdex] = recv_status.MPI_SOURCE;
2689       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2690       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2691 
2692       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2693       rbuf_nz[imdex] += i + 2;
2694       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2695       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2696       count--;
2697     }
2698     /* wait on sends of nzlocal */
2699     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2700     /* send mat->i,j to others, and recv from other's */
2701     /*------------------------------------------------*/
2702     for (i=0; i<nsends; i++){
2703       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2704       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2705     }
2706     /* wait on receives of mat->i,j */
2707     /*------------------------------*/
2708     count = nrecvs;
2709     while (count) {
2710       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2711       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2712       count--;
2713     }
2714     /* wait on sends of mat->i,j */
2715     /*---------------------------*/
2716     if (nsends) {
2717       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2718     }
2719   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2720 
2721   /* post receives, send and receive mat->a */
2722   /*----------------------------------------*/
2723   for (imdex=0; imdex<nrecvs; imdex++) {
2724     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2725   }
2726   for (i=0; i<nsends; i++){
2727     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2728   }
2729   count = nrecvs;
2730   while (count) {
2731     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2732     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2733     count--;
2734   }
2735   if (nsends) {
2736     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2737   }
2738 
2739   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2740 
2741   /* create redundant matrix */
2742   /*-------------------------*/
2743   if (reuse == MAT_INITIAL_MATRIX){
2744     /* compute rownz_max for preallocation */
2745     for (imdex=0; imdex<nrecvs; imdex++){
2746       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2747       rptr = rbuf_j[imdex];
2748       for (i=0; i<j; i++){
2749         ncols = rptr[i+1] - rptr[i];
2750         if (rownz_max < ncols) rownz_max = ncols;
2751       }
2752     }
2753 
2754     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2755     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2756     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2757     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2758     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2759     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2760   } else {
2761     C = *matredundant;
2762   }
2763 
2764   /* insert local matrix entries */
2765   rptr = sbuf_j;
2766   cols = sbuf_j + rend-rstart + 1;
2767   vals = sbuf_a;
2768   for (i=0; i<rend-rstart; i++){
2769     row   = i + rstart;
2770     ncols = rptr[i+1] - rptr[i];
2771     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2772     vals += ncols;
2773     cols += ncols;
2774   }
2775   /* insert received matrix entries */
2776   for (imdex=0; imdex<nrecvs; imdex++){
2777     rstart = rowrange[recv_rank[imdex]];
2778     rend   = rowrange[recv_rank[imdex]+1];
2779     rptr = rbuf_j[imdex];
2780     cols = rbuf_j[imdex] + rend-rstart + 1;
2781     vals = rbuf_a[imdex];
2782     for (i=0; i<rend-rstart; i++){
2783       row   = i + rstart;
2784       ncols = rptr[i+1] - rptr[i];
2785       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2786       vals += ncols;
2787       cols += ncols;
2788     }
2789   }
2790   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2793   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2794   if (reuse == MAT_INITIAL_MATRIX) {
2795     PetscContainer container;
2796     *matredundant = C;
2797     /* create a supporting struct and attach it to C for reuse */
2798     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2799     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2800     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2801     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2802     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2803     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2804 
2805     redund->nzlocal = nzlocal;
2806     redund->nsends  = nsends;
2807     redund->nrecvs  = nrecvs;
2808     redund->send_rank = send_rank;
2809     redund->recv_rank = recv_rank;
2810     redund->sbuf_nz = sbuf_nz;
2811     redund->rbuf_nz = rbuf_nz;
2812     redund->sbuf_j  = sbuf_j;
2813     redund->sbuf_a  = sbuf_a;
2814     redund->rbuf_j  = rbuf_j;
2815     redund->rbuf_a  = rbuf_a;
2816 
2817     redund->Destroy = C->ops->destroy;
2818     C->ops->destroy = MatDestroy_MatRedundant;
2819   }
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2826 {
2827   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2828   PetscErrorCode ierr;
2829   PetscInt       i,*idxb = 0;
2830   PetscScalar    *va,*vb;
2831   Vec            vtmp;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2835   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2836   if (idx) {
2837     for (i=0; i<A->rmap->n; i++) {
2838       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2839     }
2840   }
2841 
2842   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2843   if (idx) {
2844     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2845   }
2846   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2847   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2848 
2849   for (i=0; i<A->rmap->n; i++){
2850     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2851       va[i] = vb[i];
2852       if (idx) idx[i] = a->garray[idxb[i]];
2853     }
2854   }
2855 
2856   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2857   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2858   ierr = PetscFree(idxb);CHKERRQ(ierr);
2859   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2866 {
2867   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2868   PetscErrorCode ierr;
2869   PetscInt       i,*idxb = 0;
2870   PetscScalar    *va,*vb;
2871   Vec            vtmp;
2872 
2873   PetscFunctionBegin;
2874   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2875   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2876   if (idx) {
2877     for (i=0; i<A->cmap->n; i++) {
2878       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2879     }
2880   }
2881 
2882   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2883   if (idx) {
2884     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2885   }
2886   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2887   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2888 
2889   for (i=0; i<A->rmap->n; i++){
2890     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2891       va[i] = vb[i];
2892       if (idx) idx[i] = a->garray[idxb[i]];
2893     }
2894   }
2895 
2896   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2897   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2898   ierr = PetscFree(idxb);CHKERRQ(ierr);
2899   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2906 {
2907   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2908   PetscInt       n      = A->rmap->n;
2909   PetscInt       cstart = A->cmap->rstart;
2910   PetscInt      *cmap   = mat->garray;
2911   PetscInt      *diagIdx, *offdiagIdx;
2912   Vec            diagV, offdiagV;
2913   PetscScalar   *a, *diagA, *offdiagA;
2914   PetscInt       r;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2919   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2920   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2921   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2922   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2923   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2924   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2925   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2926   for(r = 0; r < n; ++r) {
2927     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2928       a[r]   = diagA[r];
2929       idx[r] = cstart + diagIdx[r];
2930     } else {
2931       a[r]   = offdiagA[r];
2932       idx[r] = cmap[offdiagIdx[r]];
2933     }
2934   }
2935   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2939   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2940   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #undef __FUNCT__
2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2947 {
2948   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2949   PetscInt       n      = A->rmap->n;
2950   PetscInt       cstart = A->cmap->rstart;
2951   PetscInt      *cmap   = mat->garray;
2952   PetscInt      *diagIdx, *offdiagIdx;
2953   Vec            diagV, offdiagV;
2954   PetscScalar   *a, *diagA, *offdiagA;
2955   PetscInt       r;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2960   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2961   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2962   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2963   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2964   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2965   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2966   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2967   for(r = 0; r < n; ++r) {
2968     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2969       a[r]   = diagA[r];
2970       idx[r] = cstart + diagIdx[r];
2971     } else {
2972       a[r]   = offdiagA[r];
2973       idx[r] = cmap[offdiagIdx[r]];
2974     }
2975   }
2976   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2978   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2979   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2980   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2981   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2988 {
2989   PetscErrorCode ierr;
2990   Mat            *dummy;
2991 
2992   PetscFunctionBegin;
2993   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2994   *newmat = *dummy;
2995   ierr = PetscFree(dummy);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3000 
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3003 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3004 {
3005   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3006   PetscErrorCode ierr;
3007 
3008   PetscFunctionBegin;
3009   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3015 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3016 {
3017   PetscErrorCode ierr;
3018   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3019 
3020   PetscFunctionBegin;
3021   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3022   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3023   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 /* -------------------------------------------------------------------*/
3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3030        MatGetRow_MPIAIJ,
3031        MatRestoreRow_MPIAIJ,
3032        MatMult_MPIAIJ,
3033 /* 4*/ MatMultAdd_MPIAIJ,
3034        MatMultTranspose_MPIAIJ,
3035        MatMultTransposeAdd_MPIAIJ,
3036 #ifdef PETSC_HAVE_PBGL
3037        MatSolve_MPIAIJ,
3038 #else
3039        0,
3040 #endif
3041        0,
3042        0,
3043 /*10*/ 0,
3044        0,
3045        0,
3046        MatSOR_MPIAIJ,
3047        MatTranspose_MPIAIJ,
3048 /*15*/ MatGetInfo_MPIAIJ,
3049        MatEqual_MPIAIJ,
3050        MatGetDiagonal_MPIAIJ,
3051        MatDiagonalScale_MPIAIJ,
3052        MatNorm_MPIAIJ,
3053 /*20*/ MatAssemblyBegin_MPIAIJ,
3054        MatAssemblyEnd_MPIAIJ,
3055        MatSetOption_MPIAIJ,
3056        MatZeroEntries_MPIAIJ,
3057 /*24*/ MatZeroRows_MPIAIJ,
3058        0,
3059 #ifdef PETSC_HAVE_PBGL
3060        0,
3061 #else
3062        0,
3063 #endif
3064        0,
3065        0,
3066 /*29*/ MatSetUp_MPIAIJ,
3067 #ifdef PETSC_HAVE_PBGL
3068        0,
3069 #else
3070        0,
3071 #endif
3072        0,
3073        0,
3074        0,
3075 /*34*/ MatDuplicate_MPIAIJ,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*39*/ MatAXPY_MPIAIJ,
3081        MatGetSubMatrices_MPIAIJ,
3082        MatIncreaseOverlap_MPIAIJ,
3083        MatGetValues_MPIAIJ,
3084        MatCopy_MPIAIJ,
3085 /*44*/ MatGetRowMax_MPIAIJ,
3086        MatScale_MPIAIJ,
3087        0,
3088        0,
3089        MatZeroRowsColumns_MPIAIJ,
3090 /*49*/ MatSetRandom_MPIAIJ,
3091        0,
3092        0,
3093        0,
3094        0,
3095 /*54*/ MatFDColoringCreate_MPIAIJ,
3096        0,
3097        MatSetUnfactored_MPIAIJ,
3098        0, /* MatPermute_MPIAIJ, impl currently broken */
3099        0,
3100 /*59*/ MatGetSubMatrix_MPIAIJ,
3101        MatDestroy_MPIAIJ,
3102        MatView_MPIAIJ,
3103        0,
3104        0,
3105 /*64*/ 0,
3106        0,
3107        0,
3108        0,
3109        0,
3110 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3111        MatGetRowMinAbs_MPIAIJ,
3112        0,
3113        MatSetColoring_MPIAIJ,
3114 #if defined(PETSC_HAVE_ADIC)
3115        MatSetValuesAdic_MPIAIJ,
3116 #else
3117        0,
3118 #endif
3119        MatSetValuesAdifor_MPIAIJ,
3120 /*75*/ MatFDColoringApply_AIJ,
3121        0,
3122        0,
3123        0,
3124        MatFindZeroDiagonals_MPIAIJ,
3125 /*80*/ 0,
3126        0,
3127        0,
3128 /*83*/ MatLoad_MPIAIJ,
3129        0,
3130        0,
3131        0,
3132        0,
3133        0,
3134 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3135        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3136        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3137        MatPtAP_Basic,
3138        MatPtAPSymbolic_MPIAIJ,
3139 /*94*/ MatPtAPNumeric_MPIAIJ,
3140        0,
3141        0,
3142        0,
3143        0,
3144 /*99*/ 0,
3145        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3146        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3147        MatConjugate_MPIAIJ,
3148        0,
3149 /*104*/MatSetValuesRow_MPIAIJ,
3150        MatRealPart_MPIAIJ,
3151        MatImaginaryPart_MPIAIJ,
3152        0,
3153        0,
3154 /*109*/0,
3155        MatGetRedundantMatrix_MPIAIJ,
3156        MatGetRowMin_MPIAIJ,
3157        0,
3158        0,
3159 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3160        0,
3161        0,
3162        0,
3163        0,
3164 /*119*/0,
3165        0,
3166        0,
3167        0,
3168        MatGetMultiProcBlock_MPIAIJ,
3169 /*124*/MatFindNonzeroRows_MPIAIJ,
3170        MatGetColumnNorms_MPIAIJ,
3171        MatInvertBlockDiagonal_MPIAIJ,
3172        0,
3173        MatGetSubMatricesParallel_MPIAIJ,
3174 /*129*/0,
3175        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3176        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3177        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3178        0,
3179 /*134*/0,
3180        0,
3181        0,
3182        0,
3183        0
3184 };
3185 
3186 /* ----------------------------------------------------------------------------------------*/
3187 
3188 EXTERN_C_BEGIN
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3191 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3192 {
3193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3198   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 EXTERN_C_END
3202 
3203 EXTERN_C_BEGIN
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3206 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3207 {
3208   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3213   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3214   PetscFunctionReturn(0);
3215 }
3216 EXTERN_C_END
3217 
3218 EXTERN_C_BEGIN
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3221 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3222 {
3223   Mat_MPIAIJ     *b;
3224   PetscErrorCode ierr;
3225   PetscInt       i;
3226   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3227 
3228   PetscFunctionBegin;
3229   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3230   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3231   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3232   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3233   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3234   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3235 
3236   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3237   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3238   if (d_nnz) {
3239     for (i=0; i<B->rmap->n; i++) {
3240       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3241     }
3242   }
3243   if (o_nnz) {
3244     for (i=0; i<B->rmap->n; i++) {
3245       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3246     }
3247   }
3248   b = (Mat_MPIAIJ*)B->data;
3249 
3250   if (!B->preallocated) {
3251     /* Explicitly create 2 MATSEQAIJ matrices. */
3252     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3253     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3254     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3255     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3256     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3257     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3258     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3259     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3260     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3261     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3262   }
3263 
3264   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3265   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3266   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3267   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3268   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3269   B->preallocated = PETSC_TRUE;
3270   PetscFunctionReturn(0);
3271 }
3272 EXTERN_C_END
3273 
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3276 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3277 {
3278   Mat            mat;
3279   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3280   PetscErrorCode ierr;
3281 
3282   PetscFunctionBegin;
3283   *newmat       = 0;
3284   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3285   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3286   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3287   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3288   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3289   a    = (Mat_MPIAIJ*)mat->data;
3290 
3291   mat->factortype    = matin->factortype;
3292   mat->rmap->bs      = matin->rmap->bs;
3293   mat->cmap->bs      = matin->cmap->bs;
3294   mat->assembled    = PETSC_TRUE;
3295   mat->insertmode   = NOT_SET_VALUES;
3296   mat->preallocated = PETSC_TRUE;
3297 
3298   a->size           = oldmat->size;
3299   a->rank           = oldmat->rank;
3300   a->donotstash     = oldmat->donotstash;
3301   a->roworiented    = oldmat->roworiented;
3302   a->rowindices     = 0;
3303   a->rowvalues      = 0;
3304   a->getrowactive   = PETSC_FALSE;
3305 
3306   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3308 
3309   if (oldmat->colmap) {
3310 #if defined (PETSC_USE_CTABLE)
3311     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3312 #else
3313     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3314     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3315     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3316 #endif
3317   } else a->colmap = 0;
3318   if (oldmat->garray) {
3319     PetscInt len;
3320     len  = oldmat->B->cmap->n;
3321     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3322     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3323     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3324   } else a->garray = 0;
3325 
3326   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3327   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3328   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3329   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3330   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3331   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3332   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3333   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3334   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3335   *newmat = mat;
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatLoad_MPIAIJ"
3343 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3344 {
3345   PetscScalar    *vals,*svals;
3346   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3347   PetscErrorCode ierr;
3348   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3349   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3350   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3351   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3352   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3353   int            fd;
3354   PetscInt       bs = 1;
3355 
3356   PetscFunctionBegin;
3357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3358   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3359   if (!rank) {
3360     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3361     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3362     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3363   }
3364 
3365   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3366   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3367   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3368 
3369   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3370 
3371   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3372   M = header[1]; N = header[2];
3373   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3374   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3375   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3376 
3377   /* If global sizes are set, check if they are consistent with that given in the file */
3378   if (sizesset) {
3379     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3380   }
3381   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3382   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3383 
3384   /* determine ownership of all (block) rows */
3385   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3386   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3387   else m = newMat->rmap->n; /* Set by user */
3388 
3389   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3390   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3391 
3392   /* First process needs enough room for process with most rows */
3393   if (!rank) {
3394     mmax       = rowners[1];
3395     for (i=2; i<size; i++) {
3396       mmax = PetscMax(mmax,rowners[i]);
3397     }
3398   } else mmax = m;
3399 
3400   rowners[0] = 0;
3401   for (i=2; i<=size; i++) {
3402     rowners[i] += rowners[i-1];
3403   }
3404   rstart = rowners[rank];
3405   rend   = rowners[rank+1];
3406 
3407   /* distribute row lengths to all processors */
3408   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3409   if (!rank) {
3410     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3411     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3412     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3413     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3414     for (j=0; j<m; j++) {
3415       procsnz[0] += ourlens[j];
3416     }
3417     for (i=1; i<size; i++) {
3418       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3419       /* calculate the number of nonzeros on each processor */
3420       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3421         procsnz[i] += rowlengths[j];
3422       }
3423       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3424     }
3425     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3426   } else {
3427     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3428   }
3429 
3430   if (!rank) {
3431     /* determine max buffer needed and allocate it */
3432     maxnz = 0;
3433     for (i=0; i<size; i++) {
3434       maxnz = PetscMax(maxnz,procsnz[i]);
3435     }
3436     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3437 
3438     /* read in my part of the matrix column indices  */
3439     nz   = procsnz[0];
3440     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3441     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3442 
3443     /* read in every one elses and ship off */
3444     for (i=1; i<size; i++) {
3445       nz     = procsnz[i];
3446       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3447       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3448     }
3449     ierr = PetscFree(cols);CHKERRQ(ierr);
3450   } else {
3451     /* determine buffer space needed for message */
3452     nz = 0;
3453     for (i=0; i<m; i++) {
3454       nz += ourlens[i];
3455     }
3456     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3457 
3458     /* receive message of column indices*/
3459     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3460   }
3461 
3462   /* determine column ownership if matrix is not square */
3463   if (N != M) {
3464     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3465     else n = newMat->cmap->n;
3466     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3467     cstart = cend - n;
3468   } else {
3469     cstart = rstart;
3470     cend   = rend;
3471     n      = cend - cstart;
3472   }
3473 
3474   /* loop over local rows, determining number of off diagonal entries */
3475   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3476   jj = 0;
3477   for (i=0; i<m; i++) {
3478     for (j=0; j<ourlens[i]; j++) {
3479       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3480       jj++;
3481     }
3482   }
3483 
3484   for (i=0; i<m; i++) {
3485     ourlens[i] -= offlens[i];
3486   }
3487   if (!sizesset) {
3488     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3489   }
3490 
3491   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3492 
3493   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3494 
3495   for (i=0; i<m; i++) {
3496     ourlens[i] += offlens[i];
3497   }
3498 
3499   if (!rank) {
3500     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3501 
3502     /* read in my part of the matrix numerical values  */
3503     nz   = procsnz[0];
3504     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3505 
3506     /* insert into matrix */
3507     jj      = rstart;
3508     smycols = mycols;
3509     svals   = vals;
3510     for (i=0; i<m; i++) {
3511       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3512       smycols += ourlens[i];
3513       svals   += ourlens[i];
3514       jj++;
3515     }
3516 
3517     /* read in other processors and ship out */
3518     for (i=1; i<size; i++) {
3519       nz     = procsnz[i];
3520       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3521       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3522     }
3523     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3524   } else {
3525     /* receive numeric values */
3526     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3527 
3528     /* receive message of values*/
3529     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3530 
3531     /* insert into matrix */
3532     jj      = rstart;
3533     smycols = mycols;
3534     svals   = vals;
3535     for (i=0; i<m; i++) {
3536       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3537       smycols += ourlens[i];
3538       svals   += ourlens[i];
3539       jj++;
3540     }
3541   }
3542   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3543   ierr = PetscFree(vals);CHKERRQ(ierr);
3544   ierr = PetscFree(mycols);CHKERRQ(ierr);
3545   ierr = PetscFree(rowners);CHKERRQ(ierr);
3546   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548 
3549   PetscFunctionReturn(0);
3550 }
3551 
3552 #undef __FUNCT__
3553 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3554 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3555 {
3556   PetscErrorCode ierr;
3557   IS             iscol_local;
3558   PetscInt       csize;
3559 
3560   PetscFunctionBegin;
3561   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3562   if (call == MAT_REUSE_MATRIX) {
3563     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3564     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3565   } else {
3566     PetscInt cbs;
3567     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3568     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3569     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3570   }
3571   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3572   if (call == MAT_INITIAL_MATRIX) {
3573     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3574     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3575   }
3576   PetscFunctionReturn(0);
3577 }
3578 
3579 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3580 #undef __FUNCT__
3581 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3582 /*
3583     Not great since it makes two copies of the submatrix, first an SeqAIJ
3584   in local and then by concatenating the local matrices the end result.
3585   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3586 
3587   Note: This requires a sequential iscol with all indices.
3588 */
3589 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3590 {
3591   PetscErrorCode ierr;
3592   PetscMPIInt    rank,size;
3593   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3594   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3595   PetscBool      allcolumns, colflag;
3596   Mat            M,Mreuse;
3597   MatScalar      *vwork,*aa;
3598   MPI_Comm       comm = ((PetscObject)mat)->comm;
3599   Mat_SeqAIJ     *aij;
3600 
3601 
3602   PetscFunctionBegin;
3603   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3605 
3606   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3607   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3608   if (colflag && ncol == mat->cmap->N){
3609     allcolumns = PETSC_TRUE;
3610   } else {
3611     allcolumns = PETSC_FALSE;
3612   }
3613   if (call ==  MAT_REUSE_MATRIX) {
3614     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3615     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3616     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3617   } else {
3618     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3619   }
3620 
3621   /*
3622       m - number of local rows
3623       n - number of columns (same on all processors)
3624       rstart - first row in new global matrix generated
3625   */
3626   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3627   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3628   if (call == MAT_INITIAL_MATRIX) {
3629     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3630     ii  = aij->i;
3631     jj  = aij->j;
3632 
3633     /*
3634         Determine the number of non-zeros in the diagonal and off-diagonal
3635         portions of the matrix in order to do correct preallocation
3636     */
3637 
3638     /* first get start and end of "diagonal" columns */
3639     if (csize == PETSC_DECIDE) {
3640       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3641       if (mglobal == n) { /* square matrix */
3642 	nlocal = m;
3643       } else {
3644         nlocal = n/size + ((n % size) > rank);
3645       }
3646     } else {
3647       nlocal = csize;
3648     }
3649     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3650     rstart = rend - nlocal;
3651     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3652 
3653     /* next, compute all the lengths */
3654     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3655     olens = dlens + m;
3656     for (i=0; i<m; i++) {
3657       jend = ii[i+1] - ii[i];
3658       olen = 0;
3659       dlen = 0;
3660       for (j=0; j<jend; j++) {
3661         if (*jj < rstart || *jj >= rend) olen++;
3662         else dlen++;
3663         jj++;
3664       }
3665       olens[i] = olen;
3666       dlens[i] = dlen;
3667     }
3668     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3669     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3670     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3671     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3672     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3673     ierr = PetscFree(dlens);CHKERRQ(ierr);
3674   } else {
3675     PetscInt ml,nl;
3676 
3677     M = *newmat;
3678     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3679     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3680     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3681     /*
3682          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3683        rather than the slower MatSetValues().
3684     */
3685     M->was_assembled = PETSC_TRUE;
3686     M->assembled     = PETSC_FALSE;
3687   }
3688   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3689   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3690   ii  = aij->i;
3691   jj  = aij->j;
3692   aa  = aij->a;
3693   for (i=0; i<m; i++) {
3694     row   = rstart + i;
3695     nz    = ii[i+1] - ii[i];
3696     cwork = jj;     jj += nz;
3697     vwork = aa;     aa += nz;
3698     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3699   }
3700 
3701   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703   *newmat = M;
3704 
3705   /* save submatrix used in processor for next request */
3706   if (call ==  MAT_INITIAL_MATRIX) {
3707     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3708     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3709   }
3710 
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 EXTERN_C_BEGIN
3715 #undef __FUNCT__
3716 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3717 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3718 {
3719   PetscInt       m,cstart, cend,j,nnz,i,d;
3720   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3721   const PetscInt *JJ;
3722   PetscScalar    *values;
3723   PetscErrorCode ierr;
3724 
3725   PetscFunctionBegin;
3726   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3727 
3728   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3729   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3730   m      = B->rmap->n;
3731   cstart = B->cmap->rstart;
3732   cend   = B->cmap->rend;
3733   rstart = B->rmap->rstart;
3734 
3735   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3736 
3737 #if defined(PETSC_USE_DEBUGGING)
3738   for (i=0; i<m; i++) {
3739     nnz     = Ii[i+1]- Ii[i];
3740     JJ      = J + Ii[i];
3741     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3742     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3743     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3744   }
3745 #endif
3746 
3747   for (i=0; i<m; i++) {
3748     nnz     = Ii[i+1]- Ii[i];
3749     JJ      = J + Ii[i];
3750     nnz_max = PetscMax(nnz_max,nnz);
3751     d       = 0;
3752     for (j=0; j<nnz; j++) {
3753       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3754     }
3755     d_nnz[i] = d;
3756     o_nnz[i] = nnz - d;
3757   }
3758   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3759   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3760 
3761   if (v) values = (PetscScalar*)v;
3762   else {
3763     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3764     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3765   }
3766 
3767   for (i=0; i<m; i++) {
3768     ii   = i + rstart;
3769     nnz  = Ii[i+1]- Ii[i];
3770     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3771   }
3772   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3774 
3775   if (!v) {
3776     ierr = PetscFree(values);CHKERRQ(ierr);
3777   }
3778   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3779   PetscFunctionReturn(0);
3780 }
3781 EXTERN_C_END
3782 
3783 #undef __FUNCT__
3784 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3785 /*@
3786    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3787    (the default parallel PETSc format).
3788 
3789    Collective on MPI_Comm
3790 
3791    Input Parameters:
3792 +  B - the matrix
3793 .  i - the indices into j for the start of each local row (starts with zero)
3794 .  j - the column indices for each local row (starts with zero)
3795 -  v - optional values in the matrix
3796 
3797    Level: developer
3798 
3799    Notes:
3800        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3801      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3802      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3803 
3804        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3805 
3806        The format which is used for the sparse matrix input, is equivalent to a
3807     row-major ordering.. i.e for the following matrix, the input data expected is
3808     as shown:
3809 
3810         1 0 0
3811         2 0 3     P0
3812        -------
3813         4 5 6     P1
3814 
3815      Process0 [P0]: rows_owned=[0,1]
3816         i =  {0,1,3}  [size = nrow+1  = 2+1]
3817         j =  {0,0,2}  [size = nz = 6]
3818         v =  {1,2,3}  [size = nz = 6]
3819 
3820      Process1 [P1]: rows_owned=[2]
3821         i =  {0,3}    [size = nrow+1  = 1+1]
3822         j =  {0,1,2}  [size = nz = 6]
3823         v =  {4,5,6}  [size = nz = 6]
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3828           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3829 @*/
3830 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3831 {
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3841 /*@C
3842    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3843    (the default parallel PETSc format).  For good matrix assembly performance
3844    the user should preallocate the matrix storage by setting the parameters
3845    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3846    performance can be increased by more than a factor of 50.
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  A - the matrix
3852 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3853            (same value is used for all local rows)
3854 .  d_nnz - array containing the number of nonzeros in the various rows of the
3855            DIAGONAL portion of the local submatrix (possibly different for each row)
3856            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3857            The size of this array is equal to the number of local rows, i.e 'm'.
3858            For matrices that will be factored, you must leave room for (and set)
3859            the diagonal entry even if it is zero.
3860 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3861            submatrix (same value is used for all local rows).
3862 -  o_nnz - array containing the number of nonzeros in the various rows of the
3863            OFF-DIAGONAL portion of the local submatrix (possibly different for
3864            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3865            structure. The size of this array is equal to the number
3866            of local rows, i.e 'm'.
3867 
3868    If the *_nnz parameter is given then the *_nz parameter is ignored
3869 
3870    The AIJ format (also called the Yale sparse matrix format or
3871    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3872    storage.  The stored row and column indices begin with zero.
3873    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3874 
3875    The parallel matrix is partitioned such that the first m0 rows belong to
3876    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3877    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3878 
3879    The DIAGONAL portion of the local submatrix of a processor can be defined
3880    as the submatrix which is obtained by extraction the part corresponding to
3881    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3882    first row that belongs to the processor, r2 is the last row belonging to
3883    the this processor, and c1-c2 is range of indices of the local part of a
3884    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3885    common case of a square matrix, the row and column ranges are the same and
3886    the DIAGONAL part is also square. The remaining portion of the local
3887    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3888 
3889    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3890 
3891    You can call MatGetInfo() to get information on how effective the preallocation was;
3892    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3893    You can also run with the option -info and look for messages with the string
3894    malloc in them to see if additional memory allocation was needed.
3895 
3896    Example usage:
3897 
3898    Consider the following 8x8 matrix with 34 non-zero values, that is
3899    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3900    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3901    as follows:
3902 
3903 .vb
3904             1  2  0  |  0  3  0  |  0  4
3905     Proc0   0  5  6  |  7  0  0  |  8  0
3906             9  0 10  | 11  0  0  | 12  0
3907     -------------------------------------
3908            13  0 14  | 15 16 17  |  0  0
3909     Proc1   0 18  0  | 19 20 21  |  0  0
3910             0  0  0  | 22 23  0  | 24  0
3911     -------------------------------------
3912     Proc2  25 26 27  |  0  0 28  | 29  0
3913            30  0  0  | 31 32 33  |  0 34
3914 .ve
3915 
3916    This can be represented as a collection of submatrices as:
3917 
3918 .vb
3919       A B C
3920       D E F
3921       G H I
3922 .ve
3923 
3924    Where the submatrices A,B,C are owned by proc0, D,E,F are
3925    owned by proc1, G,H,I are owned by proc2.
3926 
3927    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3928    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3929    The 'M','N' parameters are 8,8, and have the same values on all procs.
3930 
3931    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3932    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3933    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3934    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3935    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3936    matrix, ans [DF] as another SeqAIJ matrix.
3937 
3938    When d_nz, o_nz parameters are specified, d_nz storage elements are
3939    allocated for every row of the local diagonal submatrix, and o_nz
3940    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3941    One way to choose d_nz and o_nz is to use the max nonzerors per local
3942    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3943    In this case, the values of d_nz,o_nz are:
3944 .vb
3945      proc0 : dnz = 2, o_nz = 2
3946      proc1 : dnz = 3, o_nz = 2
3947      proc2 : dnz = 1, o_nz = 4
3948 .ve
3949    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3950    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3951    for proc3. i.e we are using 12+15+10=37 storage locations to store
3952    34 values.
3953 
3954    When d_nnz, o_nnz parameters are specified, the storage is specified
3955    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3956    In the above case the values for d_nnz,o_nnz are:
3957 .vb
3958      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3959      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3960      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3961 .ve
3962    Here the space allocated is sum of all the above values i.e 34, and
3963    hence pre-allocation is perfect.
3964 
3965    Level: intermediate
3966 
3967 .keywords: matrix, aij, compressed row, sparse, parallel
3968 
3969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3970           MPIAIJ, MatGetInfo()
3971 @*/
3972 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3973 {
3974   PetscErrorCode ierr;
3975 
3976   PetscFunctionBegin;
3977   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3978   PetscValidType(B,1);
3979   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 #undef __FUNCT__
3984 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3985 /*@
3986      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3987          CSR format the local rows.
3988 
3989    Collective on MPI_Comm
3990 
3991    Input Parameters:
3992 +  comm - MPI communicator
3993 .  m - number of local rows (Cannot be PETSC_DECIDE)
3994 .  n - This value should be the same as the local size used in creating the
3995        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3996        calculated if N is given) For square matrices n is almost always m.
3997 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3998 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3999 .   i - row indices
4000 .   j - column indices
4001 -   a - matrix values
4002 
4003    Output Parameter:
4004 .   mat - the matrix
4005 
4006    Level: intermediate
4007 
4008    Notes:
4009        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4010      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4011      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4012 
4013        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4014 
4015        The format which is used for the sparse matrix input, is equivalent to a
4016     row-major ordering.. i.e for the following matrix, the input data expected is
4017     as shown:
4018 
4019         1 0 0
4020         2 0 3     P0
4021        -------
4022         4 5 6     P1
4023 
4024      Process0 [P0]: rows_owned=[0,1]
4025         i =  {0,1,3}  [size = nrow+1  = 2+1]
4026         j =  {0,0,2}  [size = nz = 6]
4027         v =  {1,2,3}  [size = nz = 6]
4028 
4029      Process1 [P1]: rows_owned=[2]
4030         i =  {0,3}    [size = nrow+1  = 1+1]
4031         j =  {0,1,2}  [size = nz = 6]
4032         v =  {4,5,6}  [size = nz = 6]
4033 
4034 .keywords: matrix, aij, compressed row, sparse, parallel
4035 
4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4037           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4038 @*/
4039 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4040 {
4041   PetscErrorCode ierr;
4042 
4043   PetscFunctionBegin;
4044   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4045   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4046   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4047   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4048   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4049   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4050   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 #undef __FUNCT__
4055 #define __FUNCT__ "MatCreateAIJ"
4056 /*@C
4057    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4058    (the default parallel PETSc format).  For good matrix assembly performance
4059    the user should preallocate the matrix storage by setting the parameters
4060    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4061    performance can be increased by more than a factor of 50.
4062 
4063    Collective on MPI_Comm
4064 
4065    Input Parameters:
4066 +  comm - MPI communicator
4067 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4068            This value should be the same as the local size used in creating the
4069            y vector for the matrix-vector product y = Ax.
4070 .  n - This value should be the same as the local size used in creating the
4071        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4072        calculated if N is given) For square matrices n is almost always m.
4073 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4074 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4075 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4076            (same value is used for all local rows)
4077 .  d_nnz - array containing the number of nonzeros in the various rows of the
4078            DIAGONAL portion of the local submatrix (possibly different for each row)
4079            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4080            The size of this array is equal to the number of local rows, i.e 'm'.
4081 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4082            submatrix (same value is used for all local rows).
4083 -  o_nnz - array containing the number of nonzeros in the various rows of the
4084            OFF-DIAGONAL portion of the local submatrix (possibly different for
4085            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4086            structure. The size of this array is equal to the number
4087            of local rows, i.e 'm'.
4088 
4089    Output Parameter:
4090 .  A - the matrix
4091 
4092    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4093    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4094    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4095 
4096    Notes:
4097    If the *_nnz parameter is given then the *_nz parameter is ignored
4098 
4099    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4100    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4101    storage requirements for this matrix.
4102 
4103    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4104    processor than it must be used on all processors that share the object for
4105    that argument.
4106 
4107    The user MUST specify either the local or global matrix dimensions
4108    (possibly both).
4109 
4110    The parallel matrix is partitioned across processors such that the
4111    first m0 rows belong to process 0, the next m1 rows belong to
4112    process 1, the next m2 rows belong to process 2 etc.. where
4113    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4114    values corresponding to [m x N] submatrix.
4115 
4116    The columns are logically partitioned with the n0 columns belonging
4117    to 0th partition, the next n1 columns belonging to the next
4118    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4119 
4120    The DIAGONAL portion of the local submatrix on any given processor
4121    is the submatrix corresponding to the rows and columns m,n
4122    corresponding to the given processor. i.e diagonal matrix on
4123    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4124    etc. The remaining portion of the local submatrix [m x (N-n)]
4125    constitute the OFF-DIAGONAL portion. The example below better
4126    illustrates this concept.
4127 
4128    For a square global matrix we define each processor's diagonal portion
4129    to be its local rows and the corresponding columns (a square submatrix);
4130    each processor's off-diagonal portion encompasses the remainder of the
4131    local matrix (a rectangular submatrix).
4132 
4133    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4134 
4135    When calling this routine with a single process communicator, a matrix of
4136    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4137    type of communicator, use the construction mechanism:
4138      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4139 
4140    By default, this format uses inodes (identical nodes) when possible.
4141    We search for consecutive rows with the same nonzero structure, thereby
4142    reusing matrix information to achieve increased efficiency.
4143 
4144    Options Database Keys:
4145 +  -mat_no_inode  - Do not use inodes
4146 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4147 -  -mat_aij_oneindex - Internally use indexing starting at 1
4148         rather than 0.  Note that when calling MatSetValues(),
4149         the user still MUST index entries starting at 0!
4150 
4151 
4152    Example usage:
4153 
4154    Consider the following 8x8 matrix with 34 non-zero values, that is
4155    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4156    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4157    as follows:
4158 
4159 .vb
4160             1  2  0  |  0  3  0  |  0  4
4161     Proc0   0  5  6  |  7  0  0  |  8  0
4162             9  0 10  | 11  0  0  | 12  0
4163     -------------------------------------
4164            13  0 14  | 15 16 17  |  0  0
4165     Proc1   0 18  0  | 19 20 21  |  0  0
4166             0  0  0  | 22 23  0  | 24  0
4167     -------------------------------------
4168     Proc2  25 26 27  |  0  0 28  | 29  0
4169            30  0  0  | 31 32 33  |  0 34
4170 .ve
4171 
4172    This can be represented as a collection of submatrices as:
4173 
4174 .vb
4175       A B C
4176       D E F
4177       G H I
4178 .ve
4179 
4180    Where the submatrices A,B,C are owned by proc0, D,E,F are
4181    owned by proc1, G,H,I are owned by proc2.
4182 
4183    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4184    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4185    The 'M','N' parameters are 8,8, and have the same values on all procs.
4186 
4187    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4188    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4189    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4190    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4191    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4192    matrix, ans [DF] as another SeqAIJ matrix.
4193 
4194    When d_nz, o_nz parameters are specified, d_nz storage elements are
4195    allocated for every row of the local diagonal submatrix, and o_nz
4196    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4197    One way to choose d_nz and o_nz is to use the max nonzerors per local
4198    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4199    In this case, the values of d_nz,o_nz are:
4200 .vb
4201      proc0 : dnz = 2, o_nz = 2
4202      proc1 : dnz = 3, o_nz = 2
4203      proc2 : dnz = 1, o_nz = 4
4204 .ve
4205    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4206    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4207    for proc3. i.e we are using 12+15+10=37 storage locations to store
4208    34 values.
4209 
4210    When d_nnz, o_nnz parameters are specified, the storage is specified
4211    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4212    In the above case the values for d_nnz,o_nnz are:
4213 .vb
4214      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4215      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4216      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4217 .ve
4218    Here the space allocated is sum of all the above values i.e 34, and
4219    hence pre-allocation is perfect.
4220 
4221    Level: intermediate
4222 
4223 .keywords: matrix, aij, compressed row, sparse, parallel
4224 
4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4226           MPIAIJ, MatCreateMPIAIJWithArrays()
4227 @*/
4228 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4229 {
4230   PetscErrorCode ierr;
4231   PetscMPIInt    size;
4232 
4233   PetscFunctionBegin;
4234   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4235   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4237   if (size > 1) {
4238     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4239     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4240   } else {
4241     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4242     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4243   }
4244   PetscFunctionReturn(0);
4245 }
4246 
4247 #undef __FUNCT__
4248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4249 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4250 {
4251   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4252 
4253   PetscFunctionBegin;
4254   *Ad     = a->A;
4255   *Ao     = a->B;
4256   *colmap = a->garray;
4257   PetscFunctionReturn(0);
4258 }
4259 
4260 #undef __FUNCT__
4261 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4263 {
4264   PetscErrorCode ierr;
4265   PetscInt       i;
4266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4267 
4268   PetscFunctionBegin;
4269   if (coloring->ctype == IS_COLORING_GLOBAL) {
4270     ISColoringValue *allcolors,*colors;
4271     ISColoring      ocoloring;
4272 
4273     /* set coloring for diagonal portion */
4274     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4275 
4276     /* set coloring for off-diagonal portion */
4277     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4278     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->B->cmap->n; i++) {
4280       colors[i] = allcolors[a->garray[i]];
4281     }
4282     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4287     ISColoringValue *colors;
4288     PetscInt        *larray;
4289     ISColoring      ocoloring;
4290 
4291     /* set coloring for diagonal portion */
4292     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4293     for (i=0; i<a->A->cmap->n; i++) {
4294       larray[i] = i + A->cmap->rstart;
4295     }
4296     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4297     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4298     for (i=0; i<a->A->cmap->n; i++) {
4299       colors[i] = coloring->colors[larray[i]];
4300     }
4301     ierr = PetscFree(larray);CHKERRQ(ierr);
4302     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4303     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4304     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4305 
4306     /* set coloring for off-diagonal portion */
4307     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4308     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4309     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4310     for (i=0; i<a->B->cmap->n; i++) {
4311       colors[i] = coloring->colors[larray[i]];
4312     }
4313     ierr = PetscFree(larray);CHKERRQ(ierr);
4314     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4315     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4316     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4317   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4318 
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 #if defined(PETSC_HAVE_ADIC)
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4325 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4326 {
4327   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4328   PetscErrorCode ierr;
4329 
4330   PetscFunctionBegin;
4331   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4332   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4333   PetscFunctionReturn(0);
4334 }
4335 #endif
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4339 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4340 {
4341   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4342   PetscErrorCode ierr;
4343 
4344   PetscFunctionBegin;
4345   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4346   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4347   PetscFunctionReturn(0);
4348 }
4349 
4350 #undef __FUNCT__
4351 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4352 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4353 {
4354   PetscErrorCode ierr;
4355   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4356   PetscInt       *indx;
4357 
4358   PetscFunctionBegin;
4359   /* This routine will ONLY return MPIAIJ type matrix */
4360   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4361   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4362   if (n == PETSC_DECIDE){
4363     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4364   }
4365   /* Check sum(n) = N */
4366   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4367   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4368 
4369   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4370   rstart -= m;
4371 
4372   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4373   for (i=0;i<m;i++) {
4374     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4375     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4376     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4377   }
4378 
4379   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4380   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4381   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4382   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4383   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4385   PetscFunctionReturn(0);
4386 }
4387 
4388 #undef __FUNCT__
4389 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4390 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4391 {
4392   PetscErrorCode ierr;
4393   PetscInt       m,N,i,rstart,nnz,Ii;
4394   PetscInt       *indx;
4395   PetscScalar    *values;
4396 
4397   PetscFunctionBegin;
4398   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4399   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4400   for (i=0;i<m;i++) {
4401     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4402     Ii    = i + rstart;
4403     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4404     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4405   }
4406   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4407   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4408   PetscFunctionReturn(0);
4409 }
4410 
4411 #undef __FUNCT__
4412 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4413 /*@
4414       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4415                  matrices from each processor
4416 
4417     Collective on MPI_Comm
4418 
4419    Input Parameters:
4420 +    comm - the communicators the parallel matrix will live on
4421 .    inmat - the input sequential matrices
4422 .    n - number of local columns (or PETSC_DECIDE)
4423 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4424 
4425    Output Parameter:
4426 .    outmat - the parallel matrix generated
4427 
4428     Level: advanced
4429 
4430    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4431 
4432 @*/
4433 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4434 {
4435   PetscErrorCode ierr;
4436 
4437   PetscFunctionBegin;
4438   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4439   if (scall == MAT_INITIAL_MATRIX){
4440     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4441   }
4442   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4443   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4444   PetscFunctionReturn(0);
4445 }
4446 
4447 #undef __FUNCT__
4448 #define __FUNCT__ "MatFileSplit"
4449 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4450 {
4451   PetscErrorCode    ierr;
4452   PetscMPIInt       rank;
4453   PetscInt          m,N,i,rstart,nnz;
4454   size_t            len;
4455   const PetscInt    *indx;
4456   PetscViewer       out;
4457   char              *name;
4458   Mat               B;
4459   const PetscScalar *values;
4460 
4461   PetscFunctionBegin;
4462   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4463   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4464   /* Should this be the type of the diagonal block of A? */
4465   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4466   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4467   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4468   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4469   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4470   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4471   for (i=0;i<m;i++) {
4472     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4473     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4474     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4475   }
4476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4478 
4479   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4480   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4481   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4482   sprintf(name,"%s.%d",outfile,rank);
4483   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4484   ierr = PetscFree(name);
4485   ierr = MatView(B,out);CHKERRQ(ierr);
4486   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4487   ierr = MatDestroy(&B);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4492 #undef __FUNCT__
4493 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4494 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4495 {
4496   PetscErrorCode       ierr;
4497   Mat_Merge_SeqsToMPI  *merge;
4498   PetscContainer       container;
4499 
4500   PetscFunctionBegin;
4501   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4502   if (container) {
4503     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4511     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4516     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4517     ierr = PetscFree(merge);CHKERRQ(ierr);
4518     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4519   }
4520   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 #include <../src/mat/utils/freespace.h>
4525 #include <petscbt.h>
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4529 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4530 {
4531   PetscErrorCode       ierr;
4532   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4533   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4534   PetscMPIInt          size,rank,taga,*len_s;
4535   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4536   PetscInt             proc,m;
4537   PetscInt             **buf_ri,**buf_rj;
4538   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4539   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4540   MPI_Request          *s_waits,*r_waits;
4541   MPI_Status           *status;
4542   MatScalar            *aa=a->a;
4543   MatScalar            **abuf_r,*ba_i;
4544   Mat_Merge_SeqsToMPI  *merge;
4545   PetscContainer       container;
4546 
4547   PetscFunctionBegin;
4548   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4549 
4550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4551   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4552 
4553   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4554   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4555 
4556   bi     = merge->bi;
4557   bj     = merge->bj;
4558   buf_ri = merge->buf_ri;
4559   buf_rj = merge->buf_rj;
4560 
4561   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4562   owners = merge->rowmap->range;
4563   len_s  = merge->len_s;
4564 
4565   /* send and recv matrix values */
4566   /*-----------------------------*/
4567   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4568   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4569 
4570   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4571   for (proc=0,k=0; proc<size; proc++){
4572     if (!len_s[proc]) continue;
4573     i = owners[proc];
4574     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4575     k++;
4576   }
4577 
4578   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4579   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4580   ierr = PetscFree(status);CHKERRQ(ierr);
4581 
4582   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4583   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4584 
4585   /* insert mat values of mpimat */
4586   /*----------------------------*/
4587   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4588   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4589 
4590   for (k=0; k<merge->nrecv; k++){
4591     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4592     nrows = *(buf_ri_k[k]);
4593     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4594     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4595   }
4596 
4597   /* set values of ba */
4598   m = merge->rowmap->n;
4599   for (i=0; i<m; i++) {
4600     arow = owners[rank] + i;
4601     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4602     bnzi = bi[i+1] - bi[i];
4603     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4604 
4605     /* add local non-zero vals of this proc's seqmat into ba */
4606     anzi = ai[arow+1] - ai[arow];
4607     aj   = a->j + ai[arow];
4608     aa   = a->a + ai[arow];
4609     nextaj = 0;
4610     for (j=0; nextaj<anzi; j++){
4611       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4612         ba_i[j] += aa[nextaj++];
4613       }
4614     }
4615 
4616     /* add received vals into ba */
4617     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4618       /* i-th row */
4619       if (i == *nextrow[k]) {
4620         anzi = *(nextai[k]+1) - *nextai[k];
4621         aj   = buf_rj[k] + *(nextai[k]);
4622         aa   = abuf_r[k] + *(nextai[k]);
4623         nextaj = 0;
4624         for (j=0; nextaj<anzi; j++){
4625           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4626             ba_i[j] += aa[nextaj++];
4627           }
4628         }
4629         nextrow[k]++; nextai[k]++;
4630       }
4631     }
4632     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4633   }
4634   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4635   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4636 
4637   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4638   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4639   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4640   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4641   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4646 
4647 #undef __FUNCT__
4648 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4649 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4650 {
4651   PetscErrorCode       ierr;
4652   Mat                  B_mpi;
4653   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4654   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4655   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4656   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4657   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4658   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4659   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4660   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4661   MPI_Status           *status;
4662   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4663   PetscBT              lnkbt;
4664   Mat_Merge_SeqsToMPI  *merge;
4665   PetscContainer       container;
4666 
4667   PetscFunctionBegin;
4668   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4669 
4670   /* make sure it is a PETSc comm */
4671   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4672   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4673   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4674 
4675   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4676   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4677 
4678   /* determine row ownership */
4679   /*---------------------------------------------------------*/
4680   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4681   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4682   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4683   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4684   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4685   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4686   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4687 
4688   m      = merge->rowmap->n;
4689   M      = merge->rowmap->N;
4690   owners = merge->rowmap->range;
4691 
4692   /* determine the number of messages to send, their lengths */
4693   /*---------------------------------------------------------*/
4694   len_s  = merge->len_s;
4695 
4696   len = 0;  /* length of buf_si[] */
4697   merge->nsend = 0;
4698   for (proc=0; proc<size; proc++){
4699     len_si[proc] = 0;
4700     if (proc == rank){
4701       len_s[proc] = 0;
4702     } else {
4703       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4704       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4705     }
4706     if (len_s[proc]) {
4707       merge->nsend++;
4708       nrows = 0;
4709       for (i=owners[proc]; i<owners[proc+1]; i++){
4710         if (ai[i+1] > ai[i]) nrows++;
4711       }
4712       len_si[proc] = 2*(nrows+1);
4713       len += len_si[proc];
4714     }
4715   }
4716 
4717   /* determine the number and length of messages to receive for ij-structure */
4718   /*-------------------------------------------------------------------------*/
4719   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4720   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4721 
4722   /* post the Irecv of j-structure */
4723   /*-------------------------------*/
4724   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4725   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4726 
4727   /* post the Isend of j-structure */
4728   /*--------------------------------*/
4729   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4730 
4731   for (proc=0, k=0; proc<size; proc++){
4732     if (!len_s[proc]) continue;
4733     i = owners[proc];
4734     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4735     k++;
4736   }
4737 
4738   /* receives and sends of j-structure are complete */
4739   /*------------------------------------------------*/
4740   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4741   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4742 
4743   /* send and recv i-structure */
4744   /*---------------------------*/
4745   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4746   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4747 
4748   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4749   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4750   for (proc=0,k=0; proc<size; proc++){
4751     if (!len_s[proc]) continue;
4752     /* form outgoing message for i-structure:
4753          buf_si[0]:                 nrows to be sent
4754                [1:nrows]:           row index (global)
4755                [nrows+1:2*nrows+1]: i-structure index
4756     */
4757     /*-------------------------------------------*/
4758     nrows = len_si[proc]/2 - 1;
4759     buf_si_i    = buf_si + nrows+1;
4760     buf_si[0]   = nrows;
4761     buf_si_i[0] = 0;
4762     nrows = 0;
4763     for (i=owners[proc]; i<owners[proc+1]; i++){
4764       anzi = ai[i+1] - ai[i];
4765       if (anzi) {
4766         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4767         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4768         nrows++;
4769       }
4770     }
4771     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4772     k++;
4773     buf_si += len_si[proc];
4774   }
4775 
4776   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4777   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4778 
4779   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4780   for (i=0; i<merge->nrecv; i++){
4781     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4782   }
4783 
4784   ierr = PetscFree(len_si);CHKERRQ(ierr);
4785   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4786   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4787   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4788   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4789   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4790   ierr = PetscFree(status);CHKERRQ(ierr);
4791 
4792   /* compute a local seq matrix in each processor */
4793   /*----------------------------------------------*/
4794   /* allocate bi array and free space for accumulating nonzero column info */
4795   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4796   bi[0] = 0;
4797 
4798   /* create and initialize a linked list */
4799   nlnk = N+1;
4800   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4801 
4802   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4803   len = 0;
4804   len  = ai[owners[rank+1]] - ai[owners[rank]];
4805   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4806   current_space = free_space;
4807 
4808   /* determine symbolic info for each local row */
4809   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4810 
4811   for (k=0; k<merge->nrecv; k++){
4812     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4813     nrows = *buf_ri_k[k];
4814     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4815     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4816   }
4817 
4818   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4819   len = 0;
4820   for (i=0;i<m;i++) {
4821     bnzi   = 0;
4822     /* add local non-zero cols of this proc's seqmat into lnk */
4823     arow   = owners[rank] + i;
4824     anzi   = ai[arow+1] - ai[arow];
4825     aj     = a->j + ai[arow];
4826     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4827     bnzi += nlnk;
4828     /* add received col data into lnk */
4829     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4830       if (i == *nextrow[k]) { /* i-th row */
4831         anzi = *(nextai[k]+1) - *nextai[k];
4832         aj   = buf_rj[k] + *nextai[k];
4833         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4834         bnzi += nlnk;
4835         nextrow[k]++; nextai[k]++;
4836       }
4837     }
4838     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4839 
4840     /* if free space is not available, make more free space */
4841     if (current_space->local_remaining<bnzi) {
4842       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4843       nspacedouble++;
4844     }
4845     /* copy data into free space, then initialize lnk */
4846     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4847     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4848 
4849     current_space->array           += bnzi;
4850     current_space->local_used      += bnzi;
4851     current_space->local_remaining -= bnzi;
4852 
4853     bi[i+1] = bi[i] + bnzi;
4854   }
4855 
4856   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4857 
4858   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4859   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4860   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4861 
4862   /* create symbolic parallel matrix B_mpi */
4863   /*---------------------------------------*/
4864     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4865   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4866   if (n==PETSC_DECIDE) {
4867     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4868   } else {
4869     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4870   }
4871   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4872   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4873   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4874   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4875   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4876 
4877   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4878   B_mpi->assembled     = PETSC_FALSE;
4879   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4880   merge->bi            = bi;
4881   merge->bj            = bj;
4882   merge->buf_ri        = buf_ri;
4883   merge->buf_rj        = buf_rj;
4884   merge->coi           = PETSC_NULL;
4885   merge->coj           = PETSC_NULL;
4886   merge->owners_co     = PETSC_NULL;
4887 
4888   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4889 
4890   /* attach the supporting struct to B_mpi for reuse */
4891   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4892   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4893   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4894   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4895   *mpimat = B_mpi;
4896 
4897   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4898   PetscFunctionReturn(0);
4899 }
4900 
4901 #undef __FUNCT__
4902 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4903 /*@C
4904       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4905                  matrices from each processor
4906 
4907     Collective on MPI_Comm
4908 
4909    Input Parameters:
4910 +    comm - the communicators the parallel matrix will live on
4911 .    seqmat - the input sequential matrices
4912 .    m - number of local rows (or PETSC_DECIDE)
4913 .    n - number of local columns (or PETSC_DECIDE)
4914 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4915 
4916    Output Parameter:
4917 .    mpimat - the parallel matrix generated
4918 
4919     Level: advanced
4920 
4921    Notes:
4922      The dimensions of the sequential matrix in each processor MUST be the same.
4923      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4924      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4925 @*/
4926 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4927 {
4928   PetscErrorCode   ierr;
4929   PetscMPIInt     size;
4930 
4931   PetscFunctionBegin;
4932   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4933   if (size == 1){
4934      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935      if (scall == MAT_INITIAL_MATRIX){
4936        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4937      } else {
4938        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4939      }
4940      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4941      PetscFunctionReturn(0);
4942   }
4943   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4944   if (scall == MAT_INITIAL_MATRIX){
4945     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4946   }
4947   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4948   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 #undef __FUNCT__
4953 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4954 /*@
4955      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4956           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4957           with MatGetSize()
4958 
4959     Not Collective
4960 
4961    Input Parameters:
4962 +    A - the matrix
4963 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 
4965    Output Parameter:
4966 .    A_loc - the local sequential matrix generated
4967 
4968     Level: developer
4969 
4970 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4971 
4972 @*/
4973 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4974 {
4975   PetscErrorCode  ierr;
4976   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4977   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4978   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4979   MatScalar       *aa=a->a,*ba=b->a,*cam;
4980   PetscScalar     *ca;
4981   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4982   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4983   PetscBool       match;
4984 
4985   PetscFunctionBegin;
4986   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4987   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4988   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4989   if (scall == MAT_INITIAL_MATRIX){
4990     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4991     ci[0] = 0;
4992     for (i=0; i<am; i++){
4993       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4994     }
4995     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4996     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4997     k = 0;
4998     for (i=0; i<am; i++) {
4999       ncols_o = bi[i+1] - bi[i];
5000       ncols_d = ai[i+1] - ai[i];
5001       /* off-diagonal portion of A */
5002       for (jo=0; jo<ncols_o; jo++) {
5003         col = cmap[*bj];
5004         if (col >= cstart) break;
5005         cj[k]   = col; bj++;
5006         ca[k++] = *ba++;
5007       }
5008       /* diagonal portion of A */
5009       for (j=0; j<ncols_d; j++) {
5010         cj[k]   = cstart + *aj++;
5011         ca[k++] = *aa++;
5012       }
5013       /* off-diagonal portion of A */
5014       for (j=jo; j<ncols_o; j++) {
5015         cj[k]   = cmap[*bj++];
5016         ca[k++] = *ba++;
5017       }
5018     }
5019     /* put together the new matrix */
5020     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5021     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5022     /* Since these are PETSc arrays, change flags to free them as necessary. */
5023     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5024     mat->free_a  = PETSC_TRUE;
5025     mat->free_ij = PETSC_TRUE;
5026     mat->nonew   = 0;
5027   } else if (scall == MAT_REUSE_MATRIX){
5028     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5029     ci = mat->i; cj = mat->j; cam = mat->a;
5030     for (i=0; i<am; i++) {
5031       /* off-diagonal portion of A */
5032       ncols_o = bi[i+1] - bi[i];
5033       for (jo=0; jo<ncols_o; jo++) {
5034         col = cmap[*bj];
5035         if (col >= cstart) break;
5036         *cam++ = *ba++; bj++;
5037       }
5038       /* diagonal portion of A */
5039       ncols_d = ai[i+1] - ai[i];
5040       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5041       /* off-diagonal portion of A */
5042       for (j=jo; j<ncols_o; j++) {
5043         *cam++ = *ba++; bj++;
5044       }
5045     }
5046   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5047   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5048   PetscFunctionReturn(0);
5049 }
5050 
5051 #undef __FUNCT__
5052 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5053 /*@C
5054      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5055 
5056     Not Collective
5057 
5058    Input Parameters:
5059 +    A - the matrix
5060 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5061 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5062 
5063    Output Parameter:
5064 .    A_loc - the local sequential matrix generated
5065 
5066     Level: developer
5067 
5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5069 
5070 @*/
5071 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5072 {
5073   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5074   PetscErrorCode    ierr;
5075   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5076   IS                isrowa,iscola;
5077   Mat               *aloc;
5078   PetscBool       match;
5079 
5080   PetscFunctionBegin;
5081   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5082   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5083   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5084   if (!row){
5085     start = A->rmap->rstart; end = A->rmap->rend;
5086     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5087   } else {
5088     isrowa = *row;
5089   }
5090   if (!col){
5091     start = A->cmap->rstart;
5092     cmap  = a->garray;
5093     nzA   = a->A->cmap->n;
5094     nzB   = a->B->cmap->n;
5095     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5096     ncols = 0;
5097     for (i=0; i<nzB; i++) {
5098       if (cmap[i] < start) idx[ncols++] = cmap[i];
5099       else break;
5100     }
5101     imark = i;
5102     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5103     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5104     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5105   } else {
5106     iscola = *col;
5107   }
5108   if (scall != MAT_INITIAL_MATRIX){
5109     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5110     aloc[0] = *A_loc;
5111   }
5112   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5113   *A_loc = aloc[0];
5114   ierr = PetscFree(aloc);CHKERRQ(ierr);
5115   if (!row){
5116     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5117   }
5118   if (!col){
5119     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5120   }
5121   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5122   PetscFunctionReturn(0);
5123 }
5124 
5125 #undef __FUNCT__
5126 #define __FUNCT__ "MatGetBrowsOfAcols"
5127 /*@C
5128     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5129 
5130     Collective on Mat
5131 
5132    Input Parameters:
5133 +    A,B - the matrices in mpiaij format
5134 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5135 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5136 
5137    Output Parameter:
5138 +    rowb, colb - index sets of rows and columns of B to extract
5139 -    B_seq - the sequential matrix generated
5140 
5141     Level: developer
5142 
5143 @*/
5144 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5145 {
5146   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5147   PetscErrorCode    ierr;
5148   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5149   IS                isrowb,iscolb;
5150   Mat               *bseq=PETSC_NULL;
5151 
5152   PetscFunctionBegin;
5153   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5154     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5155   }
5156   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5157 
5158   if (scall == MAT_INITIAL_MATRIX){
5159     start = A->cmap->rstart;
5160     cmap  = a->garray;
5161     nzA   = a->A->cmap->n;
5162     nzB   = a->B->cmap->n;
5163     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5164     ncols = 0;
5165     for (i=0; i<nzB; i++) {  /* row < local row index */
5166       if (cmap[i] < start) idx[ncols++] = cmap[i];
5167       else break;
5168     }
5169     imark = i;
5170     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5171     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5172     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5173     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5174   } else {
5175     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5176     isrowb = *rowb; iscolb = *colb;
5177     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5178     bseq[0] = *B_seq;
5179   }
5180   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5181   *B_seq = bseq[0];
5182   ierr = PetscFree(bseq);CHKERRQ(ierr);
5183   if (!rowb){
5184     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5185   } else {
5186     *rowb = isrowb;
5187   }
5188   if (!colb){
5189     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5190   } else {
5191     *colb = iscolb;
5192   }
5193   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5194   PetscFunctionReturn(0);
5195 }
5196 
5197 #undef __FUNCT__
5198 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5199 /*
5200     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5201     of the OFF-DIAGONAL portion of local A
5202 
5203     Collective on Mat
5204 
5205    Input Parameters:
5206 +    A,B - the matrices in mpiaij format
5207 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5208 
5209    Output Parameter:
5210 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5211 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5212 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5213 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5214 
5215     Level: developer
5216 
5217 */
5218 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5219 {
5220   VecScatter_MPI_General *gen_to,*gen_from;
5221   PetscErrorCode         ierr;
5222   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5223   Mat_SeqAIJ             *b_oth;
5224   VecScatter             ctx=a->Mvctx;
5225   MPI_Comm               comm=((PetscObject)ctx)->comm;
5226   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5227   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5228   PetscScalar            *rvalues,*svalues;
5229   MatScalar              *b_otha,*bufa,*bufA;
5230   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5231   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5232   MPI_Status             *sstatus,rstatus;
5233   PetscMPIInt            jj;
5234   PetscInt               *cols,sbs,rbs;
5235   PetscScalar            *vals;
5236 
5237   PetscFunctionBegin;
5238   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5239     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5240   }
5241   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5242   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5243 
5244   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5245   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5246   rvalues  = gen_from->values; /* holds the length of receiving row */
5247   svalues  = gen_to->values;   /* holds the length of sending row */
5248   nrecvs   = gen_from->n;
5249   nsends   = gen_to->n;
5250 
5251   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5252   srow     = gen_to->indices;   /* local row index to be sent */
5253   sstarts  = gen_to->starts;
5254   sprocs   = gen_to->procs;
5255   sstatus  = gen_to->sstatus;
5256   sbs      = gen_to->bs;
5257   rstarts  = gen_from->starts;
5258   rprocs   = gen_from->procs;
5259   rbs      = gen_from->bs;
5260 
5261   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5262   if (scall == MAT_INITIAL_MATRIX){
5263     /* i-array */
5264     /*---------*/
5265     /*  post receives */
5266     for (i=0; i<nrecvs; i++){
5267       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5268       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5269       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5270     }
5271 
5272     /* pack the outgoing message */
5273     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5274     sstartsj[0] = 0;  rstartsj[0] = 0;
5275     len = 0; /* total length of j or a array to be sent */
5276     k = 0;
5277     for (i=0; i<nsends; i++){
5278       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5279       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5280       for (j=0; j<nrows; j++) {
5281         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5282         for (l=0; l<sbs; l++){
5283           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5284           rowlen[j*sbs+l] = ncols;
5285           len += ncols;
5286           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5287         }
5288         k++;
5289       }
5290       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5291       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5292     }
5293     /* recvs and sends of i-array are completed */
5294     i = nrecvs;
5295     while (i--) {
5296       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5297     }
5298     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5299 
5300     /* allocate buffers for sending j and a arrays */
5301     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5302     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5303 
5304     /* create i-array of B_oth */
5305     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5306     b_othi[0] = 0;
5307     len = 0; /* total length of j or a array to be received */
5308     k = 0;
5309     for (i=0; i<nrecvs; i++){
5310       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5311       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5312       for (j=0; j<nrows; j++) {
5313         b_othi[k+1] = b_othi[k] + rowlen[j];
5314         len += rowlen[j]; k++;
5315       }
5316       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5317     }
5318 
5319     /* allocate space for j and a arrrays of B_oth */
5320     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5321     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5322 
5323     /* j-array */
5324     /*---------*/
5325     /*  post receives of j-array */
5326     for (i=0; i<nrecvs; i++){
5327       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5328       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* pack the outgoing message j-array */
5332     k = 0;
5333     for (i=0; i<nsends; i++){
5334       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335       bufJ = bufj+sstartsj[i];
5336       for (j=0; j<nrows; j++) {
5337         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5338         for (ll=0; ll<sbs; ll++){
5339           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5340           for (l=0; l<ncols; l++){
5341             *bufJ++ = cols[l];
5342           }
5343           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5344         }
5345       }
5346       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347     }
5348 
5349     /* recvs and sends of j-array are completed */
5350     i = nrecvs;
5351     while (i--) {
5352       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353     }
5354     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355   } else if (scall == MAT_REUSE_MATRIX){
5356     sstartsj = *startsj_s;
5357     rstartsj = *startsj_r;
5358     bufa     = *bufa_ptr;
5359     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5360     b_otha   = b_oth->a;
5361   } else {
5362     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5363   }
5364 
5365   /* a-array */
5366   /*---------*/
5367   /*  post receives of a-array */
5368   for (i=0; i<nrecvs; i++){
5369     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5370     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5371   }
5372 
5373   /* pack the outgoing message a-array */
5374   k = 0;
5375   for (i=0; i<nsends; i++){
5376     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5377     bufA = bufa+sstartsj[i];
5378     for (j=0; j<nrows; j++) {
5379       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5380       for (ll=0; ll<sbs; ll++){
5381         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5382         for (l=0; l<ncols; l++){
5383           *bufA++ = vals[l];
5384         }
5385         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5386       }
5387     }
5388     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5389   }
5390   /* recvs and sends of a-array are completed */
5391   i = nrecvs;
5392   while (i--) {
5393     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5394   }
5395   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5396   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5397 
5398   if (scall == MAT_INITIAL_MATRIX){
5399     /* put together the new matrix */
5400     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5401 
5402     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5403     /* Since these are PETSc arrays, change flags to free them as necessary. */
5404     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5405     b_oth->free_a  = PETSC_TRUE;
5406     b_oth->free_ij = PETSC_TRUE;
5407     b_oth->nonew   = 0;
5408 
5409     ierr = PetscFree(bufj);CHKERRQ(ierr);
5410     if (!startsj_s || !bufa_ptr){
5411       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5412       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5413     } else {
5414       *startsj_s = sstartsj;
5415       *startsj_r = rstartsj;
5416       *bufa_ptr  = bufa;
5417     }
5418   }
5419   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5420   PetscFunctionReturn(0);
5421 }
5422 
5423 #undef __FUNCT__
5424 #define __FUNCT__ "MatGetCommunicationStructs"
5425 /*@C
5426   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5427 
5428   Not Collective
5429 
5430   Input Parameters:
5431 . A - The matrix in mpiaij format
5432 
5433   Output Parameter:
5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5435 . colmap - A map from global column index to local index into lvec
5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5437 
5438   Level: developer
5439 
5440 @*/
5441 #if defined (PETSC_USE_CTABLE)
5442 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5443 #else
5444 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5445 #endif
5446 {
5447   Mat_MPIAIJ *a;
5448 
5449   PetscFunctionBegin;
5450   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5451   PetscValidPointer(lvec, 2);
5452   PetscValidPointer(colmap, 3);
5453   PetscValidPointer(multScatter, 4);
5454   a = (Mat_MPIAIJ *) A->data;
5455   if (lvec) *lvec = a->lvec;
5456   if (colmap) *colmap = a->colmap;
5457   if (multScatter) *multScatter = a->Mvctx;
5458   PetscFunctionReturn(0);
5459 }
5460 
5461 EXTERN_C_BEGIN
5462 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5463 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5464 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5465 EXTERN_C_END
5466 
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5469 /*
5470     Computes (B'*A')' since computing B*A directly is untenable
5471 
5472                n                       p                          p
5473         (              )       (              )         (                  )
5474       m (      A       )  *  n (       B      )   =   m (         C        )
5475         (              )       (              )         (                  )
5476 
5477 */
5478 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5479 {
5480   PetscErrorCode     ierr;
5481   Mat                At,Bt,Ct;
5482 
5483   PetscFunctionBegin;
5484   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5485   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5486   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5487   ierr = MatDestroy(&At);CHKERRQ(ierr);
5488   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5489   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5490   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5491   PetscFunctionReturn(0);
5492 }
5493 
5494 #undef __FUNCT__
5495 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5496 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5497 {
5498   PetscErrorCode ierr;
5499   PetscInt       m=A->rmap->n,n=B->cmap->n;
5500   Mat            Cmat;
5501 
5502   PetscFunctionBegin;
5503   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5504   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5505   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5506   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5507   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5508   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5509   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5510   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5511   *C   = Cmat;
5512   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5513   PetscFunctionReturn(0);
5514 }
5515 
5516 /* ----------------------------------------------------------------*/
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522 
5523   PetscFunctionBegin;
5524   if (scall == MAT_INITIAL_MATRIX){
5525     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5526   }
5527   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 EXTERN_C_BEGIN
5532 #if defined(PETSC_HAVE_MUMPS)
5533 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5534 #endif
5535 #if defined(PETSC_HAVE_PASTIX)
5536 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_SUPERLU_DIST)
5539 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_SPOOLES)
5542 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5543 #endif
5544 #if defined(PETSC_HAVE_CLIQUE)
5545 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5546 #endif
5547 EXTERN_C_END
5548 
5549 /*MC
5550    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5551 
5552    Options Database Keys:
5553 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5554 
5555   Level: beginner
5556 
5557 .seealso: MatCreateAIJ()
5558 M*/
5559 
5560 EXTERN_C_BEGIN
5561 #undef __FUNCT__
5562 #define __FUNCT__ "MatCreate_MPIAIJ"
5563 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5564 {
5565   Mat_MPIAIJ     *b;
5566   PetscErrorCode ierr;
5567   PetscMPIInt    size;
5568 
5569   PetscFunctionBegin;
5570   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5571   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5572   B->data         = (void*)b;
5573   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5574   B->assembled    = PETSC_FALSE;
5575   B->insertmode   = NOT_SET_VALUES;
5576   b->size         = size;
5577   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5578 
5579   /* build cache for off array entries formed */
5580   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5581   b->donotstash  = PETSC_FALSE;
5582   b->colmap      = 0;
5583   b->garray      = 0;
5584   b->roworiented = PETSC_TRUE;
5585 
5586   /* stuff used for matrix vector multiply */
5587   b->lvec      = PETSC_NULL;
5588   b->Mvctx     = PETSC_NULL;
5589 
5590   /* stuff for MatGetRow() */
5591   b->rowindices   = 0;
5592   b->rowvalues    = 0;
5593   b->getrowactive = PETSC_FALSE;
5594 
5595   /* flexible pointer used in CUSP/CUSPARSE classes */
5596   b->spptr        = PETSC_NULL;
5597 #if defined(PETSC_HAVE_SPOOLES)
5598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5599                                      "MatGetFactor_mpiaij_spooles",
5600                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_MUMPS)
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5604                                      "MatGetFactor_aij_mumps",
5605                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5606 #endif
5607 #if defined(PETSC_HAVE_PASTIX)
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5609 					   "MatGetFactor_mpiaij_pastix",
5610 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5611 #endif
5612 #if defined(PETSC_HAVE_SUPERLU_DIST)
5613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5614                                      "MatGetFactor_mpiaij_superlu_dist",
5615                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5616 #endif
5617 #if defined(PETSC_HAVE_CLIQUE)
5618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5619                                      "MatGetFactor_aij_clique",
5620                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5621 #endif
5622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5623                                      "MatStoreValues_MPIAIJ",
5624                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5626                                      "MatRetrieveValues_MPIAIJ",
5627                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5629 				     "MatGetDiagonalBlock_MPIAIJ",
5630                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5632 				     "MatIsTranspose_MPIAIJ",
5633 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5635 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5636 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5638 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5639 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5641 				     "MatDiagonalScaleLocal_MPIAIJ",
5642 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5644                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5645                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5647                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5648                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5650                                      "MatConvert_MPIAIJ_MPISBAIJ",
5651                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5653                                      "MatMatMult_MPIDense_MPIAIJ",
5654                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5656                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5657                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5659                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5660                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5662   PetscFunctionReturn(0);
5663 }
5664 EXTERN_C_END
5665 
5666 #undef __FUNCT__
5667 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5668 /*@
5669      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5670          and "off-diagonal" part of the matrix in CSR format.
5671 
5672    Collective on MPI_Comm
5673 
5674    Input Parameters:
5675 +  comm - MPI communicator
5676 .  m - number of local rows (Cannot be PETSC_DECIDE)
5677 .  n - This value should be the same as the local size used in creating the
5678        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5679        calculated if N is given) For square matrices n is almost always m.
5680 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5681 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5682 .   i - row indices for "diagonal" portion of matrix
5683 .   j - column indices
5684 .   a - matrix values
5685 .   oi - row indices for "off-diagonal" portion of matrix
5686 .   oj - column indices
5687 -   oa - matrix values
5688 
5689    Output Parameter:
5690 .   mat - the matrix
5691 
5692    Level: advanced
5693 
5694    Notes:
5695        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5696        must free the arrays once the matrix has been destroyed and not before.
5697 
5698        The i and j indices are 0 based
5699 
5700        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5701 
5702        This sets local rows and cannot be used to set off-processor values.
5703 
5704        You cannot later use MatSetValues() to change values in this matrix.
5705 
5706 .keywords: matrix, aij, compressed row, sparse, parallel
5707 
5708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5709           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5710 @*/
5711 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5712 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5713 {
5714   PetscErrorCode ierr;
5715   Mat_MPIAIJ     *maij;
5716 
5717  PetscFunctionBegin;
5718   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5719   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5720   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5721   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5722   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5723   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5724   maij = (Mat_MPIAIJ*) (*mat)->data;
5725   maij->donotstash     = PETSC_TRUE;
5726   (*mat)->preallocated = PETSC_TRUE;
5727 
5728   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5729   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5730 
5731   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5732   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5733 
5734   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5735   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5736   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5737   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5738 
5739   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5740   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5741   PetscFunctionReturn(0);
5742 }
5743 
5744 /*
5745     Special version for direct calls from Fortran
5746 */
5747 #include <petsc-private/fortranimpl.h>
5748 
5749 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5750 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5751 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5752 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5753 #endif
5754 
5755 /* Change these macros so can be used in void function */
5756 #undef CHKERRQ
5757 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5758 #undef SETERRQ2
5759 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5760 #undef SETERRQ3
5761 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5762 #undef SETERRQ
5763 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5764 
5765 EXTERN_C_BEGIN
5766 #undef __FUNCT__
5767 #define __FUNCT__ "matsetvaluesmpiaij_"
5768 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5769 {
5770   Mat             mat = *mmat;
5771   PetscInt        m = *mm, n = *mn;
5772   InsertMode      addv = *maddv;
5773   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5774   PetscScalar     value;
5775   PetscErrorCode  ierr;
5776 
5777   MatCheckPreallocated(mat,1);
5778   if (mat->insertmode == NOT_SET_VALUES) {
5779     mat->insertmode = addv;
5780   }
5781 #if defined(PETSC_USE_DEBUG)
5782   else if (mat->insertmode != addv) {
5783     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5784   }
5785 #endif
5786   {
5787   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5788   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5789   PetscBool       roworiented = aij->roworiented;
5790 
5791   /* Some Variables required in the macro */
5792   Mat             A = aij->A;
5793   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5794   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5795   MatScalar       *aa = a->a;
5796   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5797   Mat             B = aij->B;
5798   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5799   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5800   MatScalar       *ba = b->a;
5801 
5802   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5803   PetscInt        nonew = a->nonew;
5804   MatScalar       *ap1,*ap2;
5805 
5806   PetscFunctionBegin;
5807   for (i=0; i<m; i++) {
5808     if (im[i] < 0) continue;
5809 #if defined(PETSC_USE_DEBUG)
5810     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5811 #endif
5812     if (im[i] >= rstart && im[i] < rend) {
5813       row      = im[i] - rstart;
5814       lastcol1 = -1;
5815       rp1      = aj + ai[row];
5816       ap1      = aa + ai[row];
5817       rmax1    = aimax[row];
5818       nrow1    = ailen[row];
5819       low1     = 0;
5820       high1    = nrow1;
5821       lastcol2 = -1;
5822       rp2      = bj + bi[row];
5823       ap2      = ba + bi[row];
5824       rmax2    = bimax[row];
5825       nrow2    = bilen[row];
5826       low2     = 0;
5827       high2    = nrow2;
5828 
5829       for (j=0; j<n; j++) {
5830         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5831         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5832         if (in[j] >= cstart && in[j] < cend){
5833           col = in[j] - cstart;
5834           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5835         } else if (in[j] < 0) continue;
5836 #if defined(PETSC_USE_DEBUG)
5837         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5838 #endif
5839         else {
5840           if (mat->was_assembled) {
5841             if (!aij->colmap) {
5842               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5843             }
5844 #if defined (PETSC_USE_CTABLE)
5845             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5846 	    col--;
5847 #else
5848             col = aij->colmap[in[j]] - 1;
5849 #endif
5850             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5851               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5852               col =  in[j];
5853               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5854               B = aij->B;
5855               b = (Mat_SeqAIJ*)B->data;
5856               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5857               rp2      = bj + bi[row];
5858               ap2      = ba + bi[row];
5859               rmax2    = bimax[row];
5860               nrow2    = bilen[row];
5861               low2     = 0;
5862               high2    = nrow2;
5863               bm       = aij->B->rmap->n;
5864               ba = b->a;
5865             }
5866           } else col = in[j];
5867           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5868         }
5869       }
5870     } else {
5871       if (!aij->donotstash) {
5872         if (roworiented) {
5873           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5874         } else {
5875           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5876         }
5877       }
5878     }
5879   }}
5880   PetscFunctionReturnVoid();
5881 }
5882 EXTERN_C_END
5883 
5884