xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5a9b6599d0ff68ab5f0ec64c3d9dcd7c90db1bfb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
204     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
205     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
206     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
207     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
208     rowners[0] = 0;
209     for (i=2; i<=size; i++) {
210       rowners[i] += rowners[i-1];
211     }
212     rstart = rowners[rank];
213     rend   = rowners[rank+1];
214     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
215     if (!rank) {
216       gmata = (Mat_SeqAIJ*) gmat->data;
217       /* send row lengths to all processors */
218       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
219       for (i=1; i<size; i++) {
220 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222       /* determine number diagonal and off-diagonal counts */
223       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
224       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
225       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
226       jj = 0;
227       for (i=0; i<m; i++) {
228 	for (j=0; j<dlens[i]; j++) {
229           if (gmata->j[jj] < rstart) ld[i]++;
230 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
231 	  jj++;
232 	}
233       }
234       /* send column indices to other processes */
235       for (i=1; i<size; i++) {
236 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
237 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
238 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
239       }
240 
241       /* send numerical values to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
245       }
246       gmataa = gmata->a;
247       gmataj = gmata->j;
248 
249     } else {
250       /* receive row lengths */
251       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
252       /* receive column indices */
253       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
255       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       /* determine number diagonal and off-diagonal counts */
257       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
258       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
259       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
260       jj = 0;
261       for (i=0; i<m; i++) {
262 	for (j=0; j<dlens[i]; j++) {
263           if (gmataj[jj] < rstart) ld[i]++;
264 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
265 	  jj++;
266 	}
267       }
268       /* receive numerical values */
269       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
270       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
271     }
272     /* set preallocation */
273     for (i=0; i<m; i++) {
274       dlens[i] -= olens[i];
275     }
276     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
277     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
278 
279     for (i=0; i<m; i++) {
280       dlens[i] += olens[i];
281     }
282     cnt  = 0;
283     for (i=0; i<m; i++) {
284       row  = rstart + i;
285       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
286       cnt += dlens[i];
287     }
288     if (rank) {
289       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
290     }
291     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
292     ierr = PetscFree(rowners);CHKERRQ(ierr);
293     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
294     *inmat = mat;
295   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
296     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
297     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
298     mat   = *inmat;
299     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
300     if (!rank) {
301       /* send numerical values to other processes */
302       gmata = (Mat_SeqAIJ*) gmat->data;
303       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
304       gmataa = gmata->a;
305       for (i=1; i<size; i++) {
306         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
307         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
308       }
309       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
310     } else {
311       /* receive numerical values from process 0*/
312       nz   = Ad->nz + Ao->nz;
313       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
314       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
315     }
316     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
317     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
318     ad = Ad->a;
319     ao = Ao->a;
320     if (mat->rmap->n) {
321       i  = 0;
322       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
323       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
324     }
325     for (i=1; i<mat->rmap->n; i++) {
326       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
327       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
328     }
329     i--;
330     if (mat->rmap->n) {
331       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332     }
333     if (rank) {
334       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
335     }
336   }
337   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
339   CHKMEMQ;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344   Local utility routine that creates a mapping from the global column
345 number to the local number in the off-diagonal part of the local
346 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
347 a slightly higher hash table cost; without it it is not scalable (each processor
348 has an order N integer array but is fast to acess.
349 */
350 #undef __FUNCT__
351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
353 {
354   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
355   PetscErrorCode ierr;
356   PetscInt       n = aij->B->cmap->n,i;
357 
358   PetscFunctionBegin;
359   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
360 #if defined (PETSC_USE_CTABLE)
361   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
362   for (i=0; i<n; i++){
363     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
364   }
365 #else
366   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
367   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
369   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
370 #endif
371   PetscFunctionReturn(0);
372 }
373 
374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
375 { \
376     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
377     lastcol1 = col;\
378     while (high1-low1 > 5) { \
379       t = (low1+high1)/2; \
380       if (rp1[t] > col) high1 = t; \
381       else             low1  = t; \
382     } \
383       for (_i=low1; _i<high1; _i++) { \
384         if (rp1[_i] > col) break; \
385         if (rp1[_i] == col) { \
386           if (addv == ADD_VALUES) ap1[_i] += value;   \
387           else                    ap1[_i] = value; \
388           goto a_noinsert; \
389         } \
390       }  \
391       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
392       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
393       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
394       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
395       N = nrow1++ - 1; a->nz++; high1++; \
396       /* shift up all the later entries in this row */ \
397       for (ii=N; ii>=_i; ii--) { \
398         rp1[ii+1] = rp1[ii]; \
399         ap1[ii+1] = ap1[ii]; \
400       } \
401       rp1[_i] = col;  \
402       ap1[_i] = value;  \
403       a_noinsert: ; \
404       ailen[row] = nrow1; \
405 }
406 
407 
408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
409 { \
410     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
411     lastcol2 = col;\
412     while (high2-low2 > 5) { \
413       t = (low2+high2)/2; \
414       if (rp2[t] > col) high2 = t; \
415       else             low2  = t; \
416     } \
417     for (_i=low2; _i<high2; _i++) {		\
418       if (rp2[_i] > col) break;			\
419       if (rp2[_i] == col) {			      \
420 	if (addv == ADD_VALUES) ap2[_i] += value;     \
421 	else                    ap2[_i] = value;      \
422 	goto b_noinsert;			      \
423       }						      \
424     }							      \
425     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
426     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
427     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
428     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
429     N = nrow2++ - 1; b->nz++; high2++;					\
430     /* shift up all the later entries in this row */			\
431     for (ii=N; ii>=_i; ii--) {						\
432       rp2[ii+1] = rp2[ii];						\
433       ap2[ii+1] = ap2[ii];						\
434     }									\
435     rp2[_i] = col;							\
436     ap2[_i] = value;							\
437     b_noinsert: ;								\
438     bilen[row] = nrow2;							\
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
444 {
445   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
446   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
447   PetscErrorCode ierr;
448   PetscInt       l,*garray = mat->garray,diag;
449 
450   PetscFunctionBegin;
451   /* code only works for square matrices A */
452 
453   /* find size of row to the left of the diagonal part */
454   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
455   row  = row - diag;
456   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
457     if (garray[b->j[b->i[row]+l]] > diag) break;
458   }
459   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
460 
461   /* diagonal part */
462   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
463 
464   /* right of diagonal part */
465   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatSetValues_MPIAIJ"
471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
472 {
473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
474   PetscScalar    value;
475   PetscErrorCode ierr;
476   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
477   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
478   PetscBool      roworiented = aij->roworiented;
479 
480   /* Some Variables required in the macro */
481   Mat            A = aij->A;
482   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
483   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
484   MatScalar      *aa = a->a;
485   PetscBool      ignorezeroentries = a->ignorezeroentries;
486   Mat            B = aij->B;
487   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
488   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
489   MatScalar      *ba = b->a;
490 
491   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
492   PetscInt       nonew;
493   MatScalar      *ap1,*ap2;
494 
495   PetscFunctionBegin;
496   if (v) PetscValidScalarPointer(v,6);
497   for (i=0; i<m; i++) {
498     if (im[i] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
501 #endif
502     if (im[i] >= rstart && im[i] < rend) {
503       row      = im[i] - rstart;
504       lastcol1 = -1;
505       rp1      = aj + ai[row];
506       ap1      = aa + ai[row];
507       rmax1    = aimax[row];
508       nrow1    = ailen[row];
509       low1     = 0;
510       high1    = nrow1;
511       lastcol2 = -1;
512       rp2      = bj + bi[row];
513       ap2      = ba + bi[row];
514       rmax2    = bimax[row];
515       nrow2    = bilen[row];
516       low2     = 0;
517       high2    = nrow2;
518 
519       for (j=0; j<n; j++) {
520         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
521         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
522         if (in[j] >= cstart && in[j] < cend){
523           col = in[j] - cstart;
524           nonew = a->nonew;
525           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
526         } else if (in[j] < 0) continue;
527 #if defined(PETSC_USE_DEBUG)
528         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
529 #endif
530         else {
531           if (mat->was_assembled) {
532             if (!aij->colmap) {
533               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
534             }
535 #if defined (PETSC_USE_CTABLE)
536             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
537 	    col--;
538 #else
539             col = aij->colmap[in[j]] - 1;
540 #endif
541             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
542               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
543               col =  in[j];
544               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
545               B = aij->B;
546               b = (Mat_SeqAIJ*)B->data;
547               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
548               rp2      = bj + bi[row];
549               ap2      = ba + bi[row];
550               rmax2    = bimax[row];
551               nrow2    = bilen[row];
552               low2     = 0;
553               high2    = nrow2;
554               bm       = aij->B->rmap->n;
555               ba = b->a;
556             }
557           } else col = in[j];
558           nonew = b->nonew;
559           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
560         }
561       }
562     } else {
563       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
564       if (!aij->donotstash) {
565         mat->assembled = PETSC_FALSE;
566         if (roworiented) {
567           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
568         } else {
569           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
570         }
571       }
572     }
573   }
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIAIJ"
579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
584   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= rstart && idxm[i] < rend) {
591       row = idxm[i] - rstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= cstart && idxn[j] < cend){
596           col = idxn[j] - cstart;
597           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!aij->colmap) {
600             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
604           col --;
605 #else
606           col = aij->colmap[idxn[j]] - 1;
607 #endif
608           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
609           else {
610             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611           }
612         }
613       }
614     } else {
615       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
616     }
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
626 {
627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
628   PetscErrorCode ierr;
629   PetscInt       nstash,reallocs;
630   InsertMode     addv;
631 
632   PetscFunctionBegin;
633   if (aij->donotstash || mat->nooffprocentries) {
634     PetscFunctionReturn(0);
635   }
636 
637   /* make sure all processors are either in INSERTMODE or ADDMODE */
638   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
639   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
640   mat->insertmode = addv; /* in case this processor had no cache */
641 
642   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
643   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
644   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
653   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
654   PetscErrorCode ierr;
655   PetscMPIInt    n;
656   PetscInt       i,j,rstart,ncols,flg;
657   PetscInt       *row,*col;
658   PetscBool      other_disassembled;
659   PetscScalar    *val;
660   InsertMode     addv = mat->insertmode;
661 
662   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
663   PetscFunctionBegin;
664   if (!aij->donotstash && !mat->nooffprocentries) {
665     while (1) {
666       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
667       if (!flg) break;
668 
669       for (i=0; i<n;) {
670         /* Now identify the consecutive vals belonging to the same row */
671         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
672         if (j < n) ncols = j-i;
673         else       ncols = n-i;
674         /* Now assemble all these values with a single function call */
675         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
676         i = j;
677       }
678     }
679     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
680   }
681   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
682   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
683 
684   /* determine if any processor has disassembled, if so we must
685      also disassemble ourselfs, in order that we may reassemble. */
686   /*
687      if nonzero structure of submatrix B cannot change then we know that
688      no processor disassembled thus we can skip this stuff
689   */
690   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
691     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
692     if (mat->was_assembled && !other_disassembled) {
693       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
694     }
695   }
696   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
697     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
698   }
699   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
700   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
701   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
702   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
703 
704   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
705   aij->rowvalues = 0;
706 
707   /* used by MatAXPY() */
708   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
709   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
710 
711   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
712   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
719 {
720   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
725   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroRows_MPIAIJ"
731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
732 {
733   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode    ierr;
735   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
736   PetscInt          i,*owners = A->rmap->range;
737   PetscInt          *nprocs,j,idx,nsends,row;
738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
739   PetscInt          *rvalues,count,base,slen,*source;
740   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
741   MPI_Comm          comm = ((PetscObject)A)->comm;
742   MPI_Request       *send_waits,*recv_waits;
743   MPI_Status        recv_status,*send_status;
744   const PetscScalar *xx;
745   PetscScalar       *bb;
746 #if defined(PETSC_DEBUG)
747   PetscBool      found = PETSC_FALSE;
748 #endif
749 
750   PetscFunctionBegin;
751   /*  first count number of contributors to each processor */
752   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
753   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
754   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
755   j = 0;
756   for (i=0; i<N; i++) {
757     if (lastidx > (idx = rows[i])) j = 0;
758     lastidx = idx;
759     for (; j<size; j++) {
760       if (idx >= owners[j] && idx < owners[j+1]) {
761         nprocs[2*j]++;
762         nprocs[2*j+1] = 1;
763         owner[i] = j;
764 #if defined(PETSC_DEBUG)
765         found = PETSC_TRUE;
766 #endif
767         break;
768       }
769     }
770 #if defined(PETSC_DEBUG)
771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
772     found = PETSC_FALSE;
773 #endif
774   }
775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
776 
777   if (A->nooffproczerorows) {
778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
779     nrecvs = nsends;
780     nmax   = N;
781   } else {
782     /* inform other processors of number of messages and max length*/
783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
784   }
785 
786   /* post receives:   */
787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
789   for (i=0; i<nrecvs; i++) {
790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
791   }
792 
793   /* do sends:
794       1) starts[i] gives the starting index in svalues for stuff going to
795          the ith processor
796   */
797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
800   starts[0] = 0;
801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
802   for (i=0; i<N; i++) {
803     svalues[starts[owner[i]]++] = rows[i];
804   }
805 
806   starts[0] = 0;
807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
808   count = 0;
809   for (i=0; i<size; i++) {
810     if (nprocs[2*i+1]) {
811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
812     }
813   }
814   ierr = PetscFree(starts);CHKERRQ(ierr);
815 
816   base = owners[rank];
817 
818   /*  wait on receives */
819   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
820   count  = nrecvs; slen = 0;
821   while (count) {
822     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
823     /* unpack receives into our local space */
824     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
825     source[imdex]  = recv_status.MPI_SOURCE;
826     lens[imdex]    = n;
827     slen          += n;
828     count--;
829   }
830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
831 
832   /* move the data into the send scatter */
833   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
834   count = 0;
835   for (i=0; i<nrecvs; i++) {
836     values = rvalues + i*nmax;
837     for (j=0; j<lens[i]; j++) {
838       lrows[count++] = values[j] - base;
839     }
840   }
841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
843   ierr = PetscFree(owner);CHKERRQ(ierr);
844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
845 
846   /* fix right hand side if needed */
847   if (x && b) {
848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
850     for (i=0; i<slen; i++) {
851       bb[lrows[i]] = diag*xx[lrows[i]];
852     }
853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
855   }
856   /*
857         Zero the required rows. If the "diagonal block" of the matrix
858      is square and the user wishes to set the diagonal we use separate
859      code so that MatSetValues() is not called for each diagonal allocating
860      new memory, thus calling lots of mallocs and slowing things down.
861 
862   */
863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
864   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
866     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
867   } else if (diag != 0.0) {
868     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
869     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
870       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
872     }
873     for (i = 0; i < slen; i++) {
874       row  = lrows[i] + rstart;
875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
876     }
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879   } else {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(lrows);CHKERRQ(ierr);
883 
884   /* wait on sends */
885   if (nsends) {
886     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
887     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
888     ierr = PetscFree(send_status);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(send_waits);CHKERRQ(ierr);
891   ierr = PetscFree(svalues);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
898 {
899   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
900   PetscErrorCode    ierr;
901   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
902   PetscInt          i,*owners = A->rmap->range;
903   PetscInt          *nprocs,j,idx,nsends;
904   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
905   PetscInt          *rvalues,count,base,slen,*source;
906   PetscInt          *lens,*lrows,*values,m;
907   MPI_Comm          comm = ((PetscObject)A)->comm;
908   MPI_Request       *send_waits,*recv_waits;
909   MPI_Status        recv_status,*send_status;
910   const PetscScalar *xx;
911   PetscScalar       *bb,*mask;
912   Vec               xmask,lmask;
913   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
914   const PetscInt    *aj, *ii,*ridx;
915   PetscScalar       *aa;
916 #if defined(PETSC_DEBUG)
917   PetscBool         found = PETSC_FALSE;
918 #endif
919 
920   PetscFunctionBegin;
921   /*  first count number of contributors to each processor */
922   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
923   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
924   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
925   j = 0;
926   for (i=0; i<N; i++) {
927     if (lastidx > (idx = rows[i])) j = 0;
928     lastidx = idx;
929     for (; j<size; j++) {
930       if (idx >= owners[j] && idx < owners[j+1]) {
931         nprocs[2*j]++;
932         nprocs[2*j+1] = 1;
933         owner[i] = j;
934 #if defined(PETSC_DEBUG)
935         found = PETSC_TRUE;
936 #endif
937         break;
938       }
939     }
940 #if defined(PETSC_DEBUG)
941     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
942     found = PETSC_FALSE;
943 #endif
944   }
945   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
946 
947   /* inform other processors of number of messages and max length*/
948   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
949 
950   /* post receives:   */
951   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
952   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
953   for (i=0; i<nrecvs; i++) {
954     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
955   }
956 
957   /* do sends:
958       1) starts[i] gives the starting index in svalues for stuff going to
959          the ith processor
960   */
961   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
963   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
964   starts[0] = 0;
965   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
966   for (i=0; i<N; i++) {
967     svalues[starts[owner[i]]++] = rows[i];
968   }
969 
970   starts[0] = 0;
971   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
972   count = 0;
973   for (i=0; i<size; i++) {
974     if (nprocs[2*i+1]) {
975       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
976     }
977   }
978   ierr = PetscFree(starts);CHKERRQ(ierr);
979 
980   base = owners[rank];
981 
982   /*  wait on receives */
983   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
984   count  = nrecvs; slen = 0;
985   while (count) {
986     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
987     /* unpack receives into our local space */
988     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
989     source[imdex]  = recv_status.MPI_SOURCE;
990     lens[imdex]    = n;
991     slen          += n;
992     count--;
993   }
994   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
995 
996   /* move the data into the send scatter */
997   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
998   count = 0;
999   for (i=0; i<nrecvs; i++) {
1000     values = rvalues + i*nmax;
1001     for (j=0; j<lens[i]; j++) {
1002       lrows[count++] = values[j] - base;
1003     }
1004   }
1005   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1006   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1007   ierr = PetscFree(owner);CHKERRQ(ierr);
1008   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1009   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1010 
1011   /* zero diagonal part of matrix */
1012   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1013 
1014   /* handle off diagonal part of matrix */
1015   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1016   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1017   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1018   for (i=0; i<slen; i++) {
1019     bb[lrows[i]] = 1;
1020   }
1021   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1022   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1025   if (x) {
1026     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1029     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1030   }
1031   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1032 
1033   /* remove zeroed rows of off diagonal matrix */
1034   ii = aij->i;
1035   for (i=0; i<slen; i++) {
1036     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1037   }
1038 
1039   /* loop over all elements of off process part of matrix zeroing removed columns*/
1040   if (aij->compressedrow.use){
1041     m    = aij->compressedrow.nrows;
1042     ii   = aij->compressedrow.i;
1043     ridx = aij->compressedrow.rindex;
1044     for (i=0; i<m; i++){
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048 
1049       for (j=0; j<n; j++) {
1050         if (PetscAbsScalar(mask[*aj])) {
1051           if (b) bb[*ridx] -= *aa*xx[*aj];
1052           *aa        = 0.0;
1053         }
1054         aa++;
1055         aj++;
1056       }
1057       ridx++;
1058     }
1059   } else { /* do not use compressed row format */
1060     m = l->B->rmap->n;
1061     for (i=0; i<m; i++) {
1062       n   = ii[i+1] - ii[i];
1063       aj  = aij->j + ii[i];
1064       aa  = aij->a + ii[i];
1065       for (j=0; j<n; j++) {
1066         if (PetscAbsScalar(mask[*aj])) {
1067           if (b) bb[i] -= *aa*xx[*aj];
1068           *aa    = 0.0;
1069         }
1070         aa++;
1071         aj++;
1072       }
1073     }
1074   }
1075   if (x) {
1076     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1077     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1078   }
1079   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1080   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1081   ierr = PetscFree(lrows);CHKERRQ(ierr);
1082 
1083   /* wait on sends */
1084   if (nsends) {
1085     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1086     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1087     ierr = PetscFree(send_status);CHKERRQ(ierr);
1088   }
1089   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1090   ierr = PetscFree(svalues);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMult_MPIAIJ"
1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101   PetscInt       nt;
1102 
1103   PetscFunctionBegin;
1104   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1105   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1106   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1108   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1143 {
1144   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscBool      merged;
1147 
1148   PetscFunctionBegin;
1149   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1150   /* do nondiagonal part */
1151   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1152   if (!merged) {
1153     /* send it on its way */
1154     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     /* do local part */
1156     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1157     /* receive remote parts: note this assumes the values are not actually */
1158     /* added in yy until the next line, */
1159     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160   } else {
1161     /* do local part */
1162     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1163     /* send it on its way */
1164     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165     /* values actually were received in the Begin() but we need to call this nop */
1166     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1174 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1175 {
1176   MPI_Comm       comm;
1177   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1178   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1179   IS             Me,Notme;
1180   PetscErrorCode ierr;
1181   PetscInt       M,N,first,last,*notme,i;
1182   PetscMPIInt    size;
1183 
1184   PetscFunctionBegin;
1185 
1186   /* Easy test: symmetric diagonal block */
1187   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1188   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1189   if (!*f) PetscFunctionReturn(0);
1190   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1192   if (size == 1) PetscFunctionReturn(0);
1193 
1194   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1195   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1196   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1197   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1198   for (i=0; i<first; i++) notme[i] = i;
1199   for (i=last; i<M; i++) notme[i-last+first] = i;
1200   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1201   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1202   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1203   Aoff = Aoffs[0];
1204   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1205   Boff = Boffs[0];
1206   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1207   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1209   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1211   ierr = PetscFree(notme);CHKERRQ(ierr);
1212    PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* send it on its way */
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   /* do local part */
1229   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1230   /* receive remote parts */
1231   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*
1236   This only works correctly for square matrices where the subblock A->A is the
1237    diagonal block
1238 */
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1242 {
1243   PetscErrorCode ierr;
1244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1248   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1249   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatScale_MPIAIJ"
1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1256 {
1257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1262   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIAIJ"
1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1279   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1281 #if defined (PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode    ierr;
1313   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1314   int               fd;
1315   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1316   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1317   PetscScalar       *column_values;
1318   PetscInt          message_count,flowcontrolcount;
1319 
1320   PetscFunctionBegin;
1321   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1322   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1323   nz   = A->nz + B->nz;
1324   if (!rank) {
1325     header[0] = MAT_FILE_CLASSID;
1326     header[1] = mat->rmap->N;
1327     header[2] = mat->cmap->N;
1328     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1330     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     /* get largest number of rows any processor has */
1332     rlen = mat->rmap->n;
1333     range = mat->rmap->range;
1334     for (i=1; i<size; i++) {
1335       rlen = PetscMax(rlen,range[i+1] - range[i]);
1336     }
1337   } else {
1338     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339     rlen = mat->rmap->n;
1340   }
1341 
1342   /* load up the local row counts */
1343   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1344   for (i=0; i<mat->rmap->n; i++) {
1345     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1346   }
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* )th processor needs space a largest processor needs */
1368   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1369   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt  = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ( (col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) {
1377       column_indices[cnt++] = A->j[k] + cstart;
1378     }
1379     for (; j<B->i[i+1]; j++) {
1380       column_indices[cnt++] = garray[B->j[j]];
1381     }
1382   }
1383   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1384 
1385   /* store the column indices to the file */
1386    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1387   if (!rank) {
1388     MPI_Status status;
1389     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1390     for (i=1; i<size; i++) {
1391       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1392       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1393       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1394       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1395       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     }
1397      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1398   } else {
1399     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1400     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1402     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1403   }
1404   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1405 
1406   /* load up the local column values */
1407   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1408   cnt  = 0;
1409   for (i=0; i<mat->rmap->n; i++) {
1410     for (j=B->i[i]; j<B->i[i+1]; j++) {
1411       if ( garray[B->j[j]] > cstart) break;
1412       column_values[cnt++] = B->a[j];
1413     }
1414     for (k=A->i[i]; k<A->i[i+1]; k++) {
1415       column_values[cnt++] = A->a[k];
1416     }
1417     for (; j<B->i[i+1]; j++) {
1418       column_values[cnt++] = B->a[j];
1419     }
1420   }
1421   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1422 
1423   /* store the column values to the file */
1424    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1425   if (!rank) {
1426     MPI_Status status;
1427     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1428     for (i=1; i<size; i++) {
1429        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1430       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1431       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1432       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1433       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1436   } else {
1437     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1438     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1440     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1441   }
1442   ierr = PetscFree(column_values);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1448 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1449 {
1450   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1451   PetscErrorCode    ierr;
1452   PetscMPIInt       rank = aij->rank,size = aij->size;
1453   PetscBool         isdraw,iascii,isbinary;
1454   PetscViewer       sviewer;
1455   PetscViewerFormat format;
1456 
1457   PetscFunctionBegin;
1458   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1461   if (iascii) {
1462     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1463     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1464       MatInfo    info;
1465       PetscBool  inodes;
1466 
1467       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1468       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1471       if (!inodes) {
1472         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1473 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1474       } else {
1475         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1476 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1477       }
1478       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1480       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1482       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1484       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1485       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1486       PetscFunctionReturn(0);
1487     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1488       PetscInt   inodecount,inodelimit,*inodes;
1489       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1490       if (inodes) {
1491         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1492       } else {
1493         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1494       }
1495       PetscFunctionReturn(0);
1496     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1497       PetscFunctionReturn(0);
1498     }
1499   } else if (isbinary) {
1500     if (size == 1) {
1501       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1502       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1505     }
1506     PetscFunctionReturn(0);
1507   } else if (isdraw) {
1508     PetscDraw  draw;
1509     PetscBool  isnull;
1510     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1511     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1512   }
1513 
1514   if (size == 1) {
1515     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1516     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1517   } else {
1518     /* assemble the entire matrix onto first processor. */
1519     Mat         A;
1520     Mat_SeqAIJ  *Aloc;
1521     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1522     MatScalar   *a;
1523 
1524     if (mat->rmap->N > 1024) {
1525       PetscBool  flg = PETSC_FALSE;
1526 
1527       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1528       if (!flg) {
1529         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1530       }
1531     }
1532 
1533     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1534     if (!rank) {
1535       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1536     } else {
1537       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1538     }
1539     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1540     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1541     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1542     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1543     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1544 
1545     /* copy over the A part */
1546     Aloc = (Mat_SeqAIJ*)aij->A->data;
1547     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1548     row = mat->rmap->rstart;
1549     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1550     for (i=0; i<m; i++) {
1551       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1552       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1553     }
1554     aj = Aloc->j;
1555     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1556 
1557     /* copy over the B part */
1558     Aloc = (Mat_SeqAIJ*)aij->B->data;
1559     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row  = mat->rmap->rstart;
1561     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1562     ct   = cols;
1563     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1564     for (i=0; i<m; i++) {
1565       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1566       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1567     }
1568     ierr = PetscFree(ct);CHKERRQ(ierr);
1569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1571     /*
1572        Everyone has to call to draw the matrix since the graphics waits are
1573        synchronized across all processors that share the PetscDraw object
1574     */
1575     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1576     if (!rank) {
1577       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1578       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1579       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1580       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1581     }
1582     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1583     ierr = MatDestroy(&A);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatView_MPIAIJ"
1590 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1591 {
1592   PetscErrorCode ierr;
1593   PetscBool      iascii,isdraw,issocket,isbinary;
1594 
1595   PetscFunctionBegin;
1596   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1597   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1598   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1599   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1600   if (iascii || isdraw || isbinary || issocket) {
1601     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1602   } else {
1603     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatSOR_MPIAIJ"
1610 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1611 {
1612   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1613   PetscErrorCode ierr;
1614   Vec            bb1 = 0;
1615   PetscBool      hasop;
1616 
1617   PetscFunctionBegin;
1618   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1619     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1620   }
1621 
1622   if (flag == SOR_APPLY_UPPER) {
1623     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624     PetscFunctionReturn(0);
1625   }
1626 
1627   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632 
1633     while (its--) {
1634       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1636 
1637       /* update rhs: bb1 = bb - B*x */
1638       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1639       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1640 
1641       /* local sweep */
1642       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1643     }
1644   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1645     if (flag & SOR_ZERO_INITIAL_GUESS) {
1646       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1647       its--;
1648     }
1649     while (its--) {
1650       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652 
1653       /* update rhs: bb1 = bb - B*x */
1654       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1655       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1656 
1657       /* local sweep */
1658       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1659     }
1660   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1661     if (flag & SOR_ZERO_INITIAL_GUESS) {
1662       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1663       its--;
1664     }
1665     while (its--) {
1666       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668 
1669       /* update rhs: bb1 = bb - B*x */
1670       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1671       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1672 
1673       /* local sweep */
1674       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1675     }
1676   }  else if (flag & SOR_EISENSTAT) {
1677     Vec         xx1;
1678 
1679     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1680     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1681 
1682     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1683     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1684     if (!mat->diag) {
1685       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1686       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1687     }
1688     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1689     if (hasop) {
1690       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1691     } else {
1692       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1693     }
1694     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1695 
1696     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1697 
1698     /* local sweep */
1699     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1700     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1701     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1702   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1703 
1704   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatPermute_MPIAIJ"
1710 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1711 {
1712   MPI_Comm       comm;
1713   PetscInt       first,local_rowsize,local_colsize;
1714   const PetscInt *rows;
1715   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1720   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1721   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1722   /* collect the global row permutation and invert it */
1723   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1724   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1725   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1726   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1727   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1728   /* get the local target indices */
1729   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1730   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1731   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1732   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1733   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1734   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1735   /* the column permutation is so much easier;
1736      make a local version of 'colp' and invert it */
1737   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1738   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1739   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1740   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1741   /* now we just get the submatrix */
1742   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1743   /* clean up */
1744   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1745   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   Mat            A = mat->A,B = mat->B;
1755   PetscErrorCode ierr;
1756   PetscReal      isend[5],irecv[5];
1757 
1758   PetscFunctionBegin;
1759   info->block_size     = 1.0;
1760   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1761   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1762   isend[3] = info->memory;  isend[4] = info->mallocs;
1763   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1764   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1765   isend[3] += info->memory;  isend[4] += info->mallocs;
1766   if (flag == MAT_LOCAL) {
1767     info->nz_used      = isend[0];
1768     info->nz_allocated = isend[1];
1769     info->nz_unneeded  = isend[2];
1770     info->memory       = isend[3];
1771     info->mallocs      = isend[4];
1772   } else if (flag == MAT_GLOBAL_MAX) {
1773     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1781     info->nz_used      = irecv[0];
1782     info->nz_allocated = irecv[1];
1783     info->nz_unneeded  = irecv[2];
1784     info->memory       = irecv[3];
1785     info->mallocs      = irecv[4];
1786   }
1787   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1788   info->fill_ratio_needed = 0;
1789   info->factor_mallocs    = 0;
1790 
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatSetOption_MPIAIJ"
1796 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1797 {
1798   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1799   PetscErrorCode ierr;
1800 
1801   PetscFunctionBegin;
1802   switch (op) {
1803   case MAT_NEW_NONZERO_LOCATIONS:
1804   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1805   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1806   case MAT_KEEP_NONZERO_PATTERN:
1807   case MAT_NEW_NONZERO_LOCATION_ERR:
1808   case MAT_USE_INODES:
1809   case MAT_IGNORE_ZERO_ENTRIES:
1810     MatCheckPreallocated(A,1);
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_ROW_ORIENTED:
1815     a->roworiented = flg;
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   case MAT_SPD:
1826     A->spd_set                         = PETSC_TRUE;
1827     A->spd                             = flg;
1828     if (flg) {
1829       A->symmetric                     = PETSC_TRUE;
1830       A->structurally_symmetric        = PETSC_TRUE;
1831       A->symmetric_set                 = PETSC_TRUE;
1832       A->structurally_symmetric_set    = PETSC_TRUE;
1833     }
1834     break;
1835   case MAT_SYMMETRIC:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_STRUCTURALLY_SYMMETRIC:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   case MAT_SYMMETRY_ETERNAL:
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   default:
1848     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatGetRow_MPIAIJ"
1855 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1856 {
1857   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1858   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1859   PetscErrorCode ierr;
1860   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1861   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1862   PetscInt       *cmap,*idx_p;
1863 
1864   PetscFunctionBegin;
1865   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1866   mat->getrowactive = PETSC_TRUE;
1867 
1868   if (!mat->rowvalues && (idx || v)) {
1869     /*
1870         allocate enough space to hold information from the longest row.
1871     */
1872     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1873     PetscInt   max = 1,tmp;
1874     for (i=0; i<matin->rmap->n; i++) {
1875       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1876       if (max < tmp) { max = tmp; }
1877     }
1878     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1879   }
1880 
1881   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1882   lrow = row - rstart;
1883 
1884   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1885   if (!v)   {pvA = 0; pvB = 0;}
1886   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1887   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1888   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1889   nztot = nzA + nzB;
1890 
1891   cmap  = mat->garray;
1892   if (v  || idx) {
1893     if (nztot) {
1894       /* Sort by increasing column numbers, assuming A and B already sorted */
1895       PetscInt imark = -1;
1896       if (v) {
1897         *v = v_p = mat->rowvalues;
1898         for (i=0; i<nzB; i++) {
1899           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1900           else break;
1901         }
1902         imark = i;
1903         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1904         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1905       }
1906       if (idx) {
1907         *idx = idx_p = mat->rowindices;
1908         if (imark > -1) {
1909           for (i=0; i<imark; i++) {
1910             idx_p[i] = cmap[cworkB[i]];
1911           }
1912         } else {
1913           for (i=0; i<nzB; i++) {
1914             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1915             else break;
1916           }
1917           imark = i;
1918         }
1919         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1920         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1921       }
1922     } else {
1923       if (idx) *idx = 0;
1924       if (v)   *v   = 0;
1925     }
1926   }
1927   *nz = nztot;
1928   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1929   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1936 {
1937   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1938 
1939   PetscFunctionBegin;
1940   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1941   aij->getrowactive = PETSC_FALSE;
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatNorm_MPIAIJ"
1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1948 {
1949   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1950   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1951   PetscErrorCode ierr;
1952   PetscInt       i,j,cstart = mat->cmap->rstart;
1953   PetscReal      sum = 0.0;
1954   MatScalar      *v;
1955 
1956   PetscFunctionBegin;
1957   if (aij->size == 1) {
1958     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1959   } else {
1960     if (type == NORM_FROBENIUS) {
1961       v = amat->a;
1962       for (i=0; i<amat->nz; i++) {
1963 #if defined(PETSC_USE_COMPLEX)
1964         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1965 #else
1966         sum += (*v)*(*v); v++;
1967 #endif
1968       }
1969       v = bmat->a;
1970       for (i=0; i<bmat->nz; i++) {
1971 #if defined(PETSC_USE_COMPLEX)
1972         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1973 #else
1974         sum += (*v)*(*v); v++;
1975 #endif
1976       }
1977       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1978       *norm = PetscSqrtReal(*norm);
1979     } else if (type == NORM_1) { /* max column norm */
1980       PetscReal *tmp,*tmp2;
1981       PetscInt  *jj,*garray = aij->garray;
1982       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1983       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1984       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1985       *norm = 0.0;
1986       v = amat->a; jj = amat->j;
1987       for (j=0; j<amat->nz; j++) {
1988         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1989       }
1990       v = bmat->a; jj = bmat->j;
1991       for (j=0; j<bmat->nz; j++) {
1992         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1993       }
1994       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1995       for (j=0; j<mat->cmap->N; j++) {
1996         if (tmp2[j] > *norm) *norm = tmp2[j];
1997       }
1998       ierr = PetscFree(tmp);CHKERRQ(ierr);
1999       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2000     } else if (type == NORM_INFINITY) { /* max row norm */
2001       PetscReal ntemp = 0.0;
2002       for (j=0; j<aij->A->rmap->n; j++) {
2003         v = amat->a + amat->i[j];
2004         sum = 0.0;
2005         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2006           sum += PetscAbsScalar(*v); v++;
2007         }
2008         v = bmat->a + bmat->i[j];
2009         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2010           sum += PetscAbsScalar(*v); v++;
2011         }
2012         if (sum > ntemp) ntemp = sum;
2013       }
2014       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2015     } else {
2016       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2017     }
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatTranspose_MPIAIJ"
2024 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2025 {
2026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2027   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2028   PetscErrorCode ierr;
2029   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2030   PetscInt       cstart=A->cmap->rstart,ncol;
2031   Mat            B;
2032   MatScalar      *array;
2033 
2034   PetscFunctionBegin;
2035   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2036 
2037   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2038   ai = Aloc->i; aj = Aloc->j;
2039   bi = Bloc->i; bj = Bloc->j;
2040   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2041     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2042     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2043     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2044     for (i=0; i<ai[ma]; i++){
2045       d_nnz[aj[i]] ++;
2046       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2047     }
2048 
2049     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2050     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2051     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2052     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2053     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2054     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2055     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2056   } else {
2057     B = *matout;
2058     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2059     for (i=0; i<ai[ma]; i++){
2060       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2061     }
2062   }
2063 
2064   /* copy over the A part */
2065   array = Aloc->a;
2066   row = A->rmap->rstart;
2067   for (i=0; i<ma; i++) {
2068     ncol = ai[i+1]-ai[i];
2069     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2070     row++; array += ncol; aj += ncol;
2071   }
2072   aj = Aloc->j;
2073   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2074 
2075   /* copy over the B part */
2076   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2077   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2078   array = Bloc->a;
2079   row = A->rmap->rstart;
2080   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2081   cols_tmp = cols;
2082   for (i=0; i<mb; i++) {
2083     ncol = bi[i+1]-bi[i];
2084     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2085     row++; array += ncol; cols_tmp += ncol;
2086   }
2087   ierr = PetscFree(cols);CHKERRQ(ierr);
2088 
2089   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2092     *matout = B;
2093   } else {
2094     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2102 {
2103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2104   Mat            a = aij->A,b = aij->B;
2105   PetscErrorCode ierr;
2106   PetscInt       s1,s2,s3;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2110   if (rr) {
2111     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2112     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2113     /* Overlap communication with computation. */
2114     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115   }
2116   if (ll) {
2117     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2118     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2119     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2120   }
2121   /* scale  the diagonal block */
2122   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2123 
2124   if (rr) {
2125     /* Do a scatter end and then right scale the off-diagonal block */
2126     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2127     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2128   }
2129 
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatEqual_MPIAIJ"
2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2148 {
2149   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2150   Mat            a,b,c,d;
2151   PetscBool      flg;
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   a = matA->A; b = matA->B;
2156   c = matB->A; d = matB->B;
2157 
2158   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2159   if (flg) {
2160     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2161   }
2162   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatCopy_MPIAIJ"
2168 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2169 {
2170   PetscErrorCode ierr;
2171   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2172   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2173 
2174   PetscFunctionBegin;
2175   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2176   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2177     /* because of the column compression in the off-processor part of the matrix a->B,
2178        the number of columns in a->B and b->B may be different, hence we cannot call
2179        the MatCopy() directly on the two parts. If need be, we can provide a more
2180        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2181        then copying the submatrices */
2182     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2183   } else {
2184     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2185     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatSetUp_MPIAIJ"
2192 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2193 {
2194   PetscErrorCode ierr;
2195 
2196   PetscFunctionBegin;
2197   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2203 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2204 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2205 {
2206   PetscInt          i,m=Y->rmap->N;
2207   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2208   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2209   const PetscInt    *xi = x->i,*yi = y->i;
2210 
2211   PetscFunctionBegin;
2212   /* Set the number of nonzeros in the new matrix */
2213   for(i=0; i<m; i++) {
2214     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2215     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2216     nnz[i] = 0;
2217     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2218       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2219       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2220       nnz[i]++;
2221     }
2222     for (; k<nzy; k++) nnz[i]++;
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatAXPY_MPIAIJ"
2229 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2230 {
2231   PetscErrorCode ierr;
2232   PetscInt       i;
2233   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2234   PetscBLASInt   bnz,one=1;
2235   Mat_SeqAIJ     *x,*y;
2236 
2237   PetscFunctionBegin;
2238   if (str == SAME_NONZERO_PATTERN) {
2239     PetscScalar alpha = a;
2240     x = (Mat_SeqAIJ *)xx->A->data;
2241     y = (Mat_SeqAIJ *)yy->A->data;
2242     bnz = PetscBLASIntCast(x->nz);
2243     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2244     x = (Mat_SeqAIJ *)xx->B->data;
2245     y = (Mat_SeqAIJ *)yy->B->data;
2246     bnz = PetscBLASIntCast(x->nz);
2247     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2248   } else if (str == SUBSET_NONZERO_PATTERN) {
2249     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2250 
2251     x = (Mat_SeqAIJ *)xx->B->data;
2252     y = (Mat_SeqAIJ *)yy->B->data;
2253     if (y->xtoy && y->XtoY != xx->B) {
2254       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2255       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2256     }
2257     if (!y->xtoy) { /* get xtoy */
2258       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2259       y->XtoY = xx->B;
2260       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2261     }
2262     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2263   } else {
2264     Mat B;
2265     PetscInt *nnz_d,*nnz_o;
2266     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2267     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2268     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2269     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2270     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2271     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2272     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2273     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2274     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2275     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2276     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2277     ierr = MatHeaderReplace(Y,B);
2278     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2279     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2280   }
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatConjugate_MPIAIJ"
2288 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2289 {
2290 #if defined(PETSC_USE_COMPLEX)
2291   PetscErrorCode ierr;
2292   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2296   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2297 #else
2298   PetscFunctionBegin;
2299 #endif
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatRealPart_MPIAIJ"
2305 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2306 {
2307   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2312   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2318 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2319 {
2320   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2325   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #ifdef PETSC_HAVE_PBGL
2330 
2331 #include <boost/parallel/mpi/bsp_process_group.hpp>
2332 #include <boost/graph/distributed/ilu_default_graph.hpp>
2333 #include <boost/graph/distributed/ilu_0_block.hpp>
2334 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2335 #include <boost/graph/distributed/petsc/interface.hpp>
2336 #include <boost/multi_array.hpp>
2337 #include <boost/parallel/distributed_property_map->hpp>
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2341 /*
2342   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2343 */
2344 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2345 {
2346   namespace petsc = boost::distributed::petsc;
2347 
2348   namespace graph_dist = boost::graph::distributed;
2349   using boost::graph::distributed::ilu_default::process_group_type;
2350   using boost::graph::ilu_permuted;
2351 
2352   PetscBool       row_identity, col_identity;
2353   PetscContainer  c;
2354   PetscInt        m, n, M, N;
2355   PetscErrorCode  ierr;
2356 
2357   PetscFunctionBegin;
2358   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2359   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2360   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2361   if (!row_identity || !col_identity) {
2362     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2363   }
2364 
2365   process_group_type pg;
2366   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2367   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2368   lgraph_type&   level_graph = *lgraph_p;
2369   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2370 
2371   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2372   ilu_permuted(level_graph);
2373 
2374   /* put together the new matrix */
2375   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2376   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2377   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2378   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2379   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2380   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2381   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2383 
2384   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2385   ierr = PetscContainerSetPointer(c, lgraph_p);
2386   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2387   ierr = PetscContainerDestroy(&c);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2393 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2394 {
2395   PetscFunctionBegin;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatSolve_MPIAIJ"
2401 /*
2402   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2403 */
2404 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2405 {
2406   namespace graph_dist = boost::graph::distributed;
2407 
2408   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2409   lgraph_type*   lgraph_p;
2410   PetscContainer c;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2415   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2416   ierr = VecCopy(b, x);CHKERRQ(ierr);
2417 
2418   PetscScalar* array_x;
2419   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2420   PetscInt sx;
2421   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2422 
2423   PetscScalar* array_b;
2424   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2425   PetscInt sb;
2426   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2427 
2428   lgraph_type&   level_graph = *lgraph_p;
2429   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2430 
2431   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2432   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2433                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2434 
2435   typedef boost::iterator_property_map<array_ref_type::iterator,
2436                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2437   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2438                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2439 
2440   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2441 
2442   PetscFunctionReturn(0);
2443 }
2444 #endif
2445 
2446 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2447   PetscInt       nzlocal,nsends,nrecvs;
2448   PetscMPIInt    *send_rank,*recv_rank;
2449   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2450   PetscScalar    *sbuf_a,**rbuf_a;
2451   PetscErrorCode (*Destroy)(Mat);
2452 } Mat_Redundant;
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2456 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2457 {
2458   PetscErrorCode       ierr;
2459   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2460   PetscInt             i;
2461 
2462   PetscFunctionBegin;
2463   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2464   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2465   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2466   for (i=0; i<redund->nrecvs; i++){
2467     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2468     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2469   }
2470   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2471   ierr = PetscFree(redund);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatDestroy_MatRedundant"
2477 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2478 {
2479   PetscErrorCode  ierr;
2480   PetscContainer  container;
2481   Mat_Redundant   *redund=PETSC_NULL;
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2485   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2486   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2487   A->ops->destroy = redund->Destroy;
2488   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2489   if (A->ops->destroy) {
2490     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2491   }
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2497 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2498 {
2499   PetscMPIInt    rank,size;
2500   MPI_Comm       comm=((PetscObject)mat)->comm;
2501   PetscErrorCode ierr;
2502   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2503   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2504   PetscInt       *rowrange=mat->rmap->range;
2505   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2506   Mat            A=aij->A,B=aij->B,C=*matredundant;
2507   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2508   PetscScalar    *sbuf_a;
2509   PetscInt       nzlocal=a->nz+b->nz;
2510   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2511   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2512   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2513   MatScalar      *aworkA,*aworkB;
2514   PetscScalar    *vals;
2515   PetscMPIInt    tag1,tag2,tag3,imdex;
2516   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2517                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2518   MPI_Status     recv_status,*send_status;
2519   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2520   PetscInt       **rbuf_j=PETSC_NULL;
2521   PetscScalar    **rbuf_a=PETSC_NULL;
2522   Mat_Redundant  *redund=PETSC_NULL;
2523   PetscContainer container;
2524 
2525   PetscFunctionBegin;
2526   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2528 
2529   if (reuse == MAT_REUSE_MATRIX) {
2530     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2531     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2532     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2533     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2534     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2535     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2536     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2537     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2538 
2539     nsends    = redund->nsends;
2540     nrecvs    = redund->nrecvs;
2541     send_rank = redund->send_rank;
2542     recv_rank = redund->recv_rank;
2543     sbuf_nz   = redund->sbuf_nz;
2544     rbuf_nz   = redund->rbuf_nz;
2545     sbuf_j    = redund->sbuf_j;
2546     sbuf_a    = redund->sbuf_a;
2547     rbuf_j    = redund->rbuf_j;
2548     rbuf_a    = redund->rbuf_a;
2549   }
2550 
2551   if (reuse == MAT_INITIAL_MATRIX){
2552     PetscMPIInt  subrank,subsize;
2553     PetscInt     nleftover,np_subcomm;
2554     /* get the destination processors' id send_rank, nsends and nrecvs */
2555     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2556     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2557     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2558     np_subcomm = size/nsubcomm;
2559     nleftover  = size - nsubcomm*np_subcomm;
2560     nsends = 0; nrecvs = 0;
2561     for (i=0; i<size; i++){ /* i=rank*/
2562       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2563         send_rank[nsends] = i; nsends++;
2564         recv_rank[nrecvs++] = i;
2565       }
2566     }
2567     if (rank >= size - nleftover){/* this proc is a leftover processor */
2568       i = size-nleftover-1;
2569       j = 0;
2570       while (j < nsubcomm - nleftover){
2571         send_rank[nsends++] = i;
2572         i--; j++;
2573       }
2574     }
2575 
2576     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2577       for (i=0; i<nleftover; i++){
2578         recv_rank[nrecvs++] = size-nleftover+i;
2579       }
2580     }
2581 
2582     /* allocate sbuf_j, sbuf_a */
2583     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2584     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2585     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2586   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2587 
2588   /* copy mat's local entries into the buffers */
2589   if (reuse == MAT_INITIAL_MATRIX){
2590     rownz_max = 0;
2591     rptr = sbuf_j;
2592     cols = sbuf_j + rend-rstart + 1;
2593     vals = sbuf_a;
2594     rptr[0] = 0;
2595     for (i=0; i<rend-rstart; i++){
2596       row = i + rstart;
2597       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2598       ncols  = nzA + nzB;
2599       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2600       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2601       /* load the column indices for this row into cols */
2602       lwrite = 0;
2603       for (l=0; l<nzB; l++) {
2604         if ((ctmp = bmap[cworkB[l]]) < cstart){
2605           vals[lwrite]   = aworkB[l];
2606           cols[lwrite++] = ctmp;
2607         }
2608       }
2609       for (l=0; l<nzA; l++){
2610         vals[lwrite]   = aworkA[l];
2611         cols[lwrite++] = cstart + cworkA[l];
2612       }
2613       for (l=0; l<nzB; l++) {
2614         if ((ctmp = bmap[cworkB[l]]) >= cend){
2615           vals[lwrite]   = aworkB[l];
2616           cols[lwrite++] = ctmp;
2617         }
2618       }
2619       vals += ncols;
2620       cols += ncols;
2621       rptr[i+1] = rptr[i] + ncols;
2622       if (rownz_max < ncols) rownz_max = ncols;
2623     }
2624     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2625   } else { /* only copy matrix values into sbuf_a */
2626     rptr = sbuf_j;
2627     vals = sbuf_a;
2628     rptr[0] = 0;
2629     for (i=0; i<rend-rstart; i++){
2630       row = i + rstart;
2631       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2632       ncols  = nzA + nzB;
2633       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2634       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2635       lwrite = 0;
2636       for (l=0; l<nzB; l++) {
2637         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2638       }
2639       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2640       for (l=0; l<nzB; l++) {
2641         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2642       }
2643       vals += ncols;
2644       rptr[i+1] = rptr[i] + ncols;
2645     }
2646   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2647 
2648   /* send nzlocal to others, and recv other's nzlocal */
2649   /*--------------------------------------------------*/
2650   if (reuse == MAT_INITIAL_MATRIX){
2651     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2652     s_waits2 = s_waits3 + nsends;
2653     s_waits1 = s_waits2 + nsends;
2654     r_waits1 = s_waits1 + nsends;
2655     r_waits2 = r_waits1 + nrecvs;
2656     r_waits3 = r_waits2 + nrecvs;
2657   } else {
2658     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2659     r_waits3 = s_waits3 + nsends;
2660   }
2661 
2662   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2663   if (reuse == MAT_INITIAL_MATRIX){
2664     /* get new tags to keep the communication clean */
2665     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2666     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2667     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2668 
2669     /* post receives of other's nzlocal */
2670     for (i=0; i<nrecvs; i++){
2671       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2672     }
2673     /* send nzlocal to others */
2674     for (i=0; i<nsends; i++){
2675       sbuf_nz[i] = nzlocal;
2676       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2677     }
2678     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2679     count = nrecvs;
2680     while (count) {
2681       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2682       recv_rank[imdex] = recv_status.MPI_SOURCE;
2683       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2684       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2685 
2686       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2687       rbuf_nz[imdex] += i + 2;
2688       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2689       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2690       count--;
2691     }
2692     /* wait on sends of nzlocal */
2693     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2694     /* send mat->i,j to others, and recv from other's */
2695     /*------------------------------------------------*/
2696     for (i=0; i<nsends; i++){
2697       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2698       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2699     }
2700     /* wait on receives of mat->i,j */
2701     /*------------------------------*/
2702     count = nrecvs;
2703     while (count) {
2704       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2705       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2706       count--;
2707     }
2708     /* wait on sends of mat->i,j */
2709     /*---------------------------*/
2710     if (nsends) {
2711       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2712     }
2713   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2714 
2715   /* post receives, send and receive mat->a */
2716   /*----------------------------------------*/
2717   for (imdex=0; imdex<nrecvs; imdex++) {
2718     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2719   }
2720   for (i=0; i<nsends; i++){
2721     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2722   }
2723   count = nrecvs;
2724   while (count) {
2725     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2726     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2727     count--;
2728   }
2729   if (nsends) {
2730     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2731   }
2732 
2733   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2734 
2735   /* create redundant matrix */
2736   /*-------------------------*/
2737   if (reuse == MAT_INITIAL_MATRIX){
2738     /* compute rownz_max for preallocation */
2739     for (imdex=0; imdex<nrecvs; imdex++){
2740       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2741       rptr = rbuf_j[imdex];
2742       for (i=0; i<j; i++){
2743         ncols = rptr[i+1] - rptr[i];
2744         if (rownz_max < ncols) rownz_max = ncols;
2745       }
2746     }
2747 
2748     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2749     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2750     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2751     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2752     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2753     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2754   } else {
2755     C = *matredundant;
2756   }
2757 
2758   /* insert local matrix entries */
2759   rptr = sbuf_j;
2760   cols = sbuf_j + rend-rstart + 1;
2761   vals = sbuf_a;
2762   for (i=0; i<rend-rstart; i++){
2763     row   = i + rstart;
2764     ncols = rptr[i+1] - rptr[i];
2765     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2766     vals += ncols;
2767     cols += ncols;
2768   }
2769   /* insert received matrix entries */
2770   for (imdex=0; imdex<nrecvs; imdex++){
2771     rstart = rowrange[recv_rank[imdex]];
2772     rend   = rowrange[recv_rank[imdex]+1];
2773     rptr = rbuf_j[imdex];
2774     cols = rbuf_j[imdex] + rend-rstart + 1;
2775     vals = rbuf_a[imdex];
2776     for (i=0; i<rend-rstart; i++){
2777       row   = i + rstart;
2778       ncols = rptr[i+1] - rptr[i];
2779       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2780       vals += ncols;
2781       cols += ncols;
2782     }
2783   }
2784   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2787   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2788   if (reuse == MAT_INITIAL_MATRIX) {
2789     PetscContainer container;
2790     *matredundant = C;
2791     /* create a supporting struct and attach it to C for reuse */
2792     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2793     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2794     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2795     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2796     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2797     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2798 
2799     redund->nzlocal = nzlocal;
2800     redund->nsends  = nsends;
2801     redund->nrecvs  = nrecvs;
2802     redund->send_rank = send_rank;
2803     redund->recv_rank = recv_rank;
2804     redund->sbuf_nz = sbuf_nz;
2805     redund->rbuf_nz = rbuf_nz;
2806     redund->sbuf_j  = sbuf_j;
2807     redund->sbuf_a  = sbuf_a;
2808     redund->rbuf_j  = rbuf_j;
2809     redund->rbuf_a  = rbuf_a;
2810 
2811     redund->Destroy = C->ops->destroy;
2812     C->ops->destroy = MatDestroy_MatRedundant;
2813   }
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2819 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2820 {
2821   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2822   PetscErrorCode ierr;
2823   PetscInt       i,*idxb = 0;
2824   PetscScalar    *va,*vb;
2825   Vec            vtmp;
2826 
2827   PetscFunctionBegin;
2828   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2829   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2830   if (idx) {
2831     for (i=0; i<A->rmap->n; i++) {
2832       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2833     }
2834   }
2835 
2836   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2837   if (idx) {
2838     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2839   }
2840   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2841   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2842 
2843   for (i=0; i<A->rmap->n; i++){
2844     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2845       va[i] = vb[i];
2846       if (idx) idx[i] = a->garray[idxb[i]];
2847     }
2848   }
2849 
2850   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2851   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2852   ierr = PetscFree(idxb);CHKERRQ(ierr);
2853   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2859 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2860 {
2861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2862   PetscErrorCode ierr;
2863   PetscInt       i,*idxb = 0;
2864   PetscScalar    *va,*vb;
2865   Vec            vtmp;
2866 
2867   PetscFunctionBegin;
2868   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2869   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2870   if (idx) {
2871     for (i=0; i<A->cmap->n; i++) {
2872       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2873     }
2874   }
2875 
2876   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2877   if (idx) {
2878     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2879   }
2880   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2881   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2882 
2883   for (i=0; i<A->rmap->n; i++){
2884     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2885       va[i] = vb[i];
2886       if (idx) idx[i] = a->garray[idxb[i]];
2887     }
2888   }
2889 
2890   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2892   ierr = PetscFree(idxb);CHKERRQ(ierr);
2893   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2899 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2900 {
2901   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2902   PetscInt       n      = A->rmap->n;
2903   PetscInt       cstart = A->cmap->rstart;
2904   PetscInt      *cmap   = mat->garray;
2905   PetscInt      *diagIdx, *offdiagIdx;
2906   Vec            diagV, offdiagV;
2907   PetscScalar   *a, *diagA, *offdiagA;
2908   PetscInt       r;
2909   PetscErrorCode ierr;
2910 
2911   PetscFunctionBegin;
2912   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2913   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2914   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2915   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2916   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2917   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2918   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2919   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2920   for(r = 0; r < n; ++r) {
2921     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2922       a[r]   = diagA[r];
2923       idx[r] = cstart + diagIdx[r];
2924     } else {
2925       a[r]   = offdiagA[r];
2926       idx[r] = cmap[offdiagIdx[r]];
2927     }
2928   }
2929   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2930   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2932   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2933   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2934   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2940 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2941 {
2942   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2943   PetscInt       n      = A->rmap->n;
2944   PetscInt       cstart = A->cmap->rstart;
2945   PetscInt      *cmap   = mat->garray;
2946   PetscInt      *diagIdx, *offdiagIdx;
2947   Vec            diagV, offdiagV;
2948   PetscScalar   *a, *diagA, *offdiagA;
2949   PetscInt       r;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2954   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2955   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2956   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2957   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2958   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2959   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2960   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2961   for(r = 0; r < n; ++r) {
2962     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2963       a[r]   = diagA[r];
2964       idx[r] = cstart + diagIdx[r];
2965     } else {
2966       a[r]   = offdiagA[r];
2967       idx[r] = cmap[offdiagIdx[r]];
2968     }
2969   }
2970   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2971   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2972   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2973   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2974   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2975   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2981 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2982 {
2983   PetscErrorCode ierr;
2984   Mat            *dummy;
2985 
2986   PetscFunctionBegin;
2987   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2988   *newmat = *dummy;
2989   ierr = PetscFree(dummy);CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2994 
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2997 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2998 {
2999   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 
3008 /* -------------------------------------------------------------------*/
3009 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3010        MatGetRow_MPIAIJ,
3011        MatRestoreRow_MPIAIJ,
3012        MatMult_MPIAIJ,
3013 /* 4*/ MatMultAdd_MPIAIJ,
3014        MatMultTranspose_MPIAIJ,
3015        MatMultTransposeAdd_MPIAIJ,
3016 #ifdef PETSC_HAVE_PBGL
3017        MatSolve_MPIAIJ,
3018 #else
3019        0,
3020 #endif
3021        0,
3022        0,
3023 /*10*/ 0,
3024        0,
3025        0,
3026        MatSOR_MPIAIJ,
3027        MatTranspose_MPIAIJ,
3028 /*15*/ MatGetInfo_MPIAIJ,
3029        MatEqual_MPIAIJ,
3030        MatGetDiagonal_MPIAIJ,
3031        MatDiagonalScale_MPIAIJ,
3032        MatNorm_MPIAIJ,
3033 /*20*/ MatAssemblyBegin_MPIAIJ,
3034        MatAssemblyEnd_MPIAIJ,
3035        MatSetOption_MPIAIJ,
3036        MatZeroEntries_MPIAIJ,
3037 /*24*/ MatZeroRows_MPIAIJ,
3038        0,
3039 #ifdef PETSC_HAVE_PBGL
3040        0,
3041 #else
3042        0,
3043 #endif
3044        0,
3045        0,
3046 /*29*/ MatSetUp_MPIAIJ,
3047 #ifdef PETSC_HAVE_PBGL
3048        0,
3049 #else
3050        0,
3051 #endif
3052        0,
3053        0,
3054        0,
3055 /*34*/ MatDuplicate_MPIAIJ,
3056        0,
3057        0,
3058        0,
3059        0,
3060 /*39*/ MatAXPY_MPIAIJ,
3061        MatGetSubMatrices_MPIAIJ,
3062        MatIncreaseOverlap_MPIAIJ,
3063        MatGetValues_MPIAIJ,
3064        MatCopy_MPIAIJ,
3065 /*44*/ MatGetRowMax_MPIAIJ,
3066        MatScale_MPIAIJ,
3067        0,
3068        0,
3069        MatZeroRowsColumns_MPIAIJ,
3070 /*49*/ 0,
3071        0,
3072        0,
3073        0,
3074        0,
3075 /*54*/ MatFDColoringCreate_MPIAIJ,
3076        0,
3077        MatSetUnfactored_MPIAIJ,
3078        0, /* MatPermute_MPIAIJ, impl currently broken */
3079        0,
3080 /*59*/ MatGetSubMatrix_MPIAIJ,
3081        MatDestroy_MPIAIJ,
3082        MatView_MPIAIJ,
3083        0,
3084        0,
3085 /*64*/ 0,
3086        0,
3087        0,
3088        0,
3089        0,
3090 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3091        MatGetRowMinAbs_MPIAIJ,
3092        0,
3093        MatSetColoring_MPIAIJ,
3094 #if defined(PETSC_HAVE_ADIC)
3095        MatSetValuesAdic_MPIAIJ,
3096 #else
3097        0,
3098 #endif
3099        MatSetValuesAdifor_MPIAIJ,
3100 /*75*/ MatFDColoringApply_AIJ,
3101        0,
3102        0,
3103        0,
3104        MatFindZeroDiagonals_MPIAIJ,
3105 /*80*/ 0,
3106        0,
3107        0,
3108 /*83*/ MatLoad_MPIAIJ,
3109        0,
3110        0,
3111        0,
3112        0,
3113        0,
3114 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3115        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3116        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3117        MatPtAP_Basic,
3118        MatPtAPSymbolic_MPIAIJ,
3119 /*94*/ MatPtAPNumeric_MPIAIJ,
3120        0,
3121        0,
3122        0,
3123        0,
3124 /*99*/ 0,
3125        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3126        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3127        MatConjugate_MPIAIJ,
3128        0,
3129 /*104*/MatSetValuesRow_MPIAIJ,
3130        MatRealPart_MPIAIJ,
3131        MatImaginaryPart_MPIAIJ,
3132        0,
3133        0,
3134 /*109*/0,
3135        MatGetRedundantMatrix_MPIAIJ,
3136        MatGetRowMin_MPIAIJ,
3137        0,
3138        0,
3139 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3140        0,
3141        0,
3142        0,
3143        0,
3144 /*119*/0,
3145        0,
3146        0,
3147        0,
3148        MatGetMultiProcBlock_MPIAIJ,
3149 /*124*/MatFindNonzeroRows_MPIAIJ,
3150        MatGetColumnNorms_MPIAIJ,
3151        MatInvertBlockDiagonal_MPIAIJ,
3152        0,
3153        MatGetSubMatricesParallel_MPIAIJ,
3154 /*129*/0,
3155        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3156        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3157        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3158        0,
3159 /*134*/0,
3160        0,
3161        0,
3162        0,
3163        0
3164 };
3165 
3166 /* ----------------------------------------------------------------------------------------*/
3167 
3168 EXTERN_C_BEGIN
3169 #undef __FUNCT__
3170 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3171 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3172 {
3173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3174   PetscErrorCode ierr;
3175 
3176   PetscFunctionBegin;
3177   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3178   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 EXTERN_C_END
3182 
3183 EXTERN_C_BEGIN
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3186 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3187 {
3188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3189   PetscErrorCode ierr;
3190 
3191   PetscFunctionBegin;
3192   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3193   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3194   PetscFunctionReturn(0);
3195 }
3196 EXTERN_C_END
3197 
3198 EXTERN_C_BEGIN
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3201 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3202 {
3203   Mat_MPIAIJ     *b;
3204   PetscErrorCode ierr;
3205   PetscInt       i;
3206   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3207 
3208   PetscFunctionBegin;
3209   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3210   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3211   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3212   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3213   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3214   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3215 
3216   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3217   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3218   if (d_nnz) {
3219     for (i=0; i<B->rmap->n; i++) {
3220       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3221     }
3222   }
3223   if (o_nnz) {
3224     for (i=0; i<B->rmap->n; i++) {
3225       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3226     }
3227   }
3228   b = (Mat_MPIAIJ*)B->data;
3229 
3230   if (!B->preallocated) {
3231     /* Explicitly create 2 MATSEQAIJ matrices. */
3232     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3233     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3234     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3235     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3236     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3237     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3238     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3239     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3240     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3241     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3242   }
3243 
3244   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3245   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3246   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3247   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3248   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3249   B->preallocated = PETSC_TRUE;
3250   PetscFunctionReturn(0);
3251 }
3252 EXTERN_C_END
3253 
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3256 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3257 {
3258   Mat            mat;
3259   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3260   PetscErrorCode ierr;
3261 
3262   PetscFunctionBegin;
3263   *newmat       = 0;
3264   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3265   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3266   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3267   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3268   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3269   a    = (Mat_MPIAIJ*)mat->data;
3270 
3271   mat->factortype    = matin->factortype;
3272   mat->rmap->bs      = matin->rmap->bs;
3273   mat->cmap->bs      = matin->cmap->bs;
3274   mat->assembled    = PETSC_TRUE;
3275   mat->insertmode   = NOT_SET_VALUES;
3276   mat->preallocated = PETSC_TRUE;
3277 
3278   a->size           = oldmat->size;
3279   a->rank           = oldmat->rank;
3280   a->donotstash     = oldmat->donotstash;
3281   a->roworiented    = oldmat->roworiented;
3282   a->rowindices     = 0;
3283   a->rowvalues      = 0;
3284   a->getrowactive   = PETSC_FALSE;
3285 
3286   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3287   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3288 
3289   if (oldmat->colmap) {
3290 #if defined (PETSC_USE_CTABLE)
3291     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3292 #else
3293     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3294     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3295     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3296 #endif
3297   } else a->colmap = 0;
3298   if (oldmat->garray) {
3299     PetscInt len;
3300     len  = oldmat->B->cmap->n;
3301     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3302     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3303     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3304   } else a->garray = 0;
3305 
3306   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3307   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3308   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3309   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3310   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3311   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3312   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3313   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3314   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3315   *newmat = mat;
3316   PetscFunctionReturn(0);
3317 }
3318 
3319 
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatLoad_MPIAIJ"
3323 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3324 {
3325   PetscScalar    *vals,*svals;
3326   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3327   PetscErrorCode ierr;
3328   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3329   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3330   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3331   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3332   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3333   int            fd;
3334 
3335   PetscFunctionBegin;
3336   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3337   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3338   if (!rank) {
3339     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3340     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3341     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3342   }
3343 
3344   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3345 
3346   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3347   M = header[1]; N = header[2];
3348   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3349   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3350   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3351 
3352   /* If global sizes are set, check if they are consistent with that given in the file */
3353   if (sizesset) {
3354     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3355   }
3356   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3357   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3358 
3359   /* determine ownership of all rows */
3360   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3361   else m = newMat->rmap->n; /* Set by user */
3362 
3363   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3364   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3365 
3366   /* First process needs enough room for process with most rows */
3367   if (!rank) {
3368     mmax       = rowners[1];
3369     for (i=2; i<size; i++) {
3370       mmax = PetscMax(mmax,rowners[i]);
3371     }
3372   } else mmax = m;
3373 
3374   rowners[0] = 0;
3375   for (i=2; i<=size; i++) {
3376     rowners[i] += rowners[i-1];
3377   }
3378   rstart = rowners[rank];
3379   rend   = rowners[rank+1];
3380 
3381   /* distribute row lengths to all processors */
3382   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3383   if (!rank) {
3384     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3385     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3386     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3387     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3388     for (j=0; j<m; j++) {
3389       procsnz[0] += ourlens[j];
3390     }
3391     for (i=1; i<size; i++) {
3392       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3393       /* calculate the number of nonzeros on each processor */
3394       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3395         procsnz[i] += rowlengths[j];
3396       }
3397       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3398     }
3399     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3400   } else {
3401     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3402   }
3403 
3404   if (!rank) {
3405     /* determine max buffer needed and allocate it */
3406     maxnz = 0;
3407     for (i=0; i<size; i++) {
3408       maxnz = PetscMax(maxnz,procsnz[i]);
3409     }
3410     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3411 
3412     /* read in my part of the matrix column indices  */
3413     nz   = procsnz[0];
3414     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3415     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3416 
3417     /* read in every one elses and ship off */
3418     for (i=1; i<size; i++) {
3419       nz     = procsnz[i];
3420       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3421       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3422     }
3423     ierr = PetscFree(cols);CHKERRQ(ierr);
3424   } else {
3425     /* determine buffer space needed for message */
3426     nz = 0;
3427     for (i=0; i<m; i++) {
3428       nz += ourlens[i];
3429     }
3430     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3431 
3432     /* receive message of column indices*/
3433     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3434   }
3435 
3436   /* determine column ownership if matrix is not square */
3437   if (N != M) {
3438     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3439     else n = newMat->cmap->n;
3440     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3441     cstart = cend - n;
3442   } else {
3443     cstart = rstart;
3444     cend   = rend;
3445     n      = cend - cstart;
3446   }
3447 
3448   /* loop over local rows, determining number of off diagonal entries */
3449   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3450   jj = 0;
3451   for (i=0; i<m; i++) {
3452     for (j=0; j<ourlens[i]; j++) {
3453       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3454       jj++;
3455     }
3456   }
3457 
3458   for (i=0; i<m; i++) {
3459     ourlens[i] -= offlens[i];
3460   }
3461   if (!sizesset) {
3462     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3463   }
3464   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3465 
3466   for (i=0; i<m; i++) {
3467     ourlens[i] += offlens[i];
3468   }
3469 
3470   if (!rank) {
3471     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3472 
3473     /* read in my part of the matrix numerical values  */
3474     nz   = procsnz[0];
3475     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3476 
3477     /* insert into matrix */
3478     jj      = rstart;
3479     smycols = mycols;
3480     svals   = vals;
3481     for (i=0; i<m; i++) {
3482       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3483       smycols += ourlens[i];
3484       svals   += ourlens[i];
3485       jj++;
3486     }
3487 
3488     /* read in other processors and ship out */
3489     for (i=1; i<size; i++) {
3490       nz     = procsnz[i];
3491       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3492       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3493     }
3494     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3495   } else {
3496     /* receive numeric values */
3497     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3498 
3499     /* receive message of values*/
3500     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3501 
3502     /* insert into matrix */
3503     jj      = rstart;
3504     smycols = mycols;
3505     svals   = vals;
3506     for (i=0; i<m; i++) {
3507       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3508       smycols += ourlens[i];
3509       svals   += ourlens[i];
3510       jj++;
3511     }
3512   }
3513   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3514   ierr = PetscFree(vals);CHKERRQ(ierr);
3515   ierr = PetscFree(mycols);CHKERRQ(ierr);
3516   ierr = PetscFree(rowners);CHKERRQ(ierr);
3517 
3518   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3519   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 #undef __FUNCT__
3524 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3525 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3526 {
3527   PetscErrorCode ierr;
3528   IS             iscol_local;
3529   PetscInt       csize;
3530 
3531   PetscFunctionBegin;
3532   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3533   if (call == MAT_REUSE_MATRIX) {
3534     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3535     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3536   } else {
3537     PetscInt cbs;
3538     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3539     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3540     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3541   }
3542   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3543   if (call == MAT_INITIAL_MATRIX) {
3544     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3545     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3546   }
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3551 #undef __FUNCT__
3552 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3553 /*
3554     Not great since it makes two copies of the submatrix, first an SeqAIJ
3555   in local and then by concatenating the local matrices the end result.
3556   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3557 
3558   Note: This requires a sequential iscol with all indices.
3559 */
3560 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3561 {
3562   PetscErrorCode ierr;
3563   PetscMPIInt    rank,size;
3564   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3565   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3566   PetscBool      allcolumns, colflag;
3567   Mat            M,Mreuse;
3568   MatScalar      *vwork,*aa;
3569   MPI_Comm       comm = ((PetscObject)mat)->comm;
3570   Mat_SeqAIJ     *aij;
3571 
3572 
3573   PetscFunctionBegin;
3574   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3575   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3576 
3577   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3578   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3579   if (colflag && ncol == mat->cmap->N){
3580     allcolumns = PETSC_TRUE;
3581   } else {
3582     allcolumns = PETSC_FALSE;
3583   }
3584   if (call ==  MAT_REUSE_MATRIX) {
3585     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3586     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3587     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3588   } else {
3589     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3590   }
3591 
3592   /*
3593       m - number of local rows
3594       n - number of columns (same on all processors)
3595       rstart - first row in new global matrix generated
3596   */
3597   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3598   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3599   if (call == MAT_INITIAL_MATRIX) {
3600     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3601     ii  = aij->i;
3602     jj  = aij->j;
3603 
3604     /*
3605         Determine the number of non-zeros in the diagonal and off-diagonal
3606         portions of the matrix in order to do correct preallocation
3607     */
3608 
3609     /* first get start and end of "diagonal" columns */
3610     if (csize == PETSC_DECIDE) {
3611       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3612       if (mglobal == n) { /* square matrix */
3613 	nlocal = m;
3614       } else {
3615         nlocal = n/size + ((n % size) > rank);
3616       }
3617     } else {
3618       nlocal = csize;
3619     }
3620     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3621     rstart = rend - nlocal;
3622     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3623 
3624     /* next, compute all the lengths */
3625     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3626     olens = dlens + m;
3627     for (i=0; i<m; i++) {
3628       jend = ii[i+1] - ii[i];
3629       olen = 0;
3630       dlen = 0;
3631       for (j=0; j<jend; j++) {
3632         if (*jj < rstart || *jj >= rend) olen++;
3633         else dlen++;
3634         jj++;
3635       }
3636       olens[i] = olen;
3637       dlens[i] = dlen;
3638     }
3639     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3640     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3641     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3642     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3643     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3644     ierr = PetscFree(dlens);CHKERRQ(ierr);
3645   } else {
3646     PetscInt ml,nl;
3647 
3648     M = *newmat;
3649     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3650     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3651     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3652     /*
3653          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3654        rather than the slower MatSetValues().
3655     */
3656     M->was_assembled = PETSC_TRUE;
3657     M->assembled     = PETSC_FALSE;
3658   }
3659   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3660   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3661   ii  = aij->i;
3662   jj  = aij->j;
3663   aa  = aij->a;
3664   for (i=0; i<m; i++) {
3665     row   = rstart + i;
3666     nz    = ii[i+1] - ii[i];
3667     cwork = jj;     jj += nz;
3668     vwork = aa;     aa += nz;
3669     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3670   }
3671 
3672   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3673   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3674   *newmat = M;
3675 
3676   /* save submatrix used in processor for next request */
3677   if (call ==  MAT_INITIAL_MATRIX) {
3678     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3679     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3680   }
3681 
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 EXTERN_C_BEGIN
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3688 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3689 {
3690   PetscInt       m,cstart, cend,j,nnz,i,d;
3691   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3692   const PetscInt *JJ;
3693   PetscScalar    *values;
3694   PetscErrorCode ierr;
3695 
3696   PetscFunctionBegin;
3697   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3698 
3699   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3700   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3701   m      = B->rmap->n;
3702   cstart = B->cmap->rstart;
3703   cend   = B->cmap->rend;
3704   rstart = B->rmap->rstart;
3705 
3706   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3707 
3708 #if defined(PETSC_USE_DEBUGGING)
3709   for (i=0; i<m; i++) {
3710     nnz     = Ii[i+1]- Ii[i];
3711     JJ      = J + Ii[i];
3712     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3713     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3714     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3715   }
3716 #endif
3717 
3718   for (i=0; i<m; i++) {
3719     nnz     = Ii[i+1]- Ii[i];
3720     JJ      = J + Ii[i];
3721     nnz_max = PetscMax(nnz_max,nnz);
3722     d       = 0;
3723     for (j=0; j<nnz; j++) {
3724       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3725     }
3726     d_nnz[i] = d;
3727     o_nnz[i] = nnz - d;
3728   }
3729   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3730   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3731 
3732   if (v) values = (PetscScalar*)v;
3733   else {
3734     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3735     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3736   }
3737 
3738   for (i=0; i<m; i++) {
3739     ii   = i + rstart;
3740     nnz  = Ii[i+1]- Ii[i];
3741     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3742   }
3743   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3745 
3746   if (!v) {
3747     ierr = PetscFree(values);CHKERRQ(ierr);
3748   }
3749   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3750   PetscFunctionReturn(0);
3751 }
3752 EXTERN_C_END
3753 
3754 #undef __FUNCT__
3755 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3756 /*@
3757    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3758    (the default parallel PETSc format).
3759 
3760    Collective on MPI_Comm
3761 
3762    Input Parameters:
3763 +  B - the matrix
3764 .  i - the indices into j for the start of each local row (starts with zero)
3765 .  j - the column indices for each local row (starts with zero)
3766 -  v - optional values in the matrix
3767 
3768    Level: developer
3769 
3770    Notes:
3771        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3772      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3773      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3774 
3775        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3776 
3777        The format which is used for the sparse matrix input, is equivalent to a
3778     row-major ordering.. i.e for the following matrix, the input data expected is
3779     as shown:
3780 
3781         1 0 0
3782         2 0 3     P0
3783        -------
3784         4 5 6     P1
3785 
3786      Process0 [P0]: rows_owned=[0,1]
3787         i =  {0,1,3}  [size = nrow+1  = 2+1]
3788         j =  {0,0,2}  [size = nz = 6]
3789         v =  {1,2,3}  [size = nz = 6]
3790 
3791      Process1 [P1]: rows_owned=[2]
3792         i =  {0,3}    [size = nrow+1  = 1+1]
3793         j =  {0,1,2}  [size = nz = 6]
3794         v =  {4,5,6}  [size = nz = 6]
3795 
3796 .keywords: matrix, aij, compressed row, sparse, parallel
3797 
3798 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3799           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3800 @*/
3801 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3802 {
3803   PetscErrorCode ierr;
3804 
3805   PetscFunctionBegin;
3806   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3807   PetscFunctionReturn(0);
3808 }
3809 
3810 #undef __FUNCT__
3811 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3812 /*@C
3813    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3814    (the default parallel PETSc format).  For good matrix assembly performance
3815    the user should preallocate the matrix storage by setting the parameters
3816    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3817    performance can be increased by more than a factor of 50.
3818 
3819    Collective on MPI_Comm
3820 
3821    Input Parameters:
3822 +  A - the matrix
3823 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3824            (same value is used for all local rows)
3825 .  d_nnz - array containing the number of nonzeros in the various rows of the
3826            DIAGONAL portion of the local submatrix (possibly different for each row)
3827            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3828            The size of this array is equal to the number of local rows, i.e 'm'.
3829            For matrices that will be factored, you must leave room for (and set)
3830            the diagonal entry even if it is zero.
3831 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3832            submatrix (same value is used for all local rows).
3833 -  o_nnz - array containing the number of nonzeros in the various rows of the
3834            OFF-DIAGONAL portion of the local submatrix (possibly different for
3835            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3836            structure. The size of this array is equal to the number
3837            of local rows, i.e 'm'.
3838 
3839    If the *_nnz parameter is given then the *_nz parameter is ignored
3840 
3841    The AIJ format (also called the Yale sparse matrix format or
3842    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3843    storage.  The stored row and column indices begin with zero.
3844    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3845 
3846    The parallel matrix is partitioned such that the first m0 rows belong to
3847    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3848    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3849 
3850    The DIAGONAL portion of the local submatrix of a processor can be defined
3851    as the submatrix which is obtained by extraction the part corresponding to
3852    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3853    first row that belongs to the processor, r2 is the last row belonging to
3854    the this processor, and c1-c2 is range of indices of the local part of a
3855    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3856    common case of a square matrix, the row and column ranges are the same and
3857    the DIAGONAL part is also square. The remaining portion of the local
3858    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3859 
3860    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3861 
3862    You can call MatGetInfo() to get information on how effective the preallocation was;
3863    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3864    You can also run with the option -info and look for messages with the string
3865    malloc in them to see if additional memory allocation was needed.
3866 
3867    Example usage:
3868 
3869    Consider the following 8x8 matrix with 34 non-zero values, that is
3870    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3871    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3872    as follows:
3873 
3874 .vb
3875             1  2  0  |  0  3  0  |  0  4
3876     Proc0   0  5  6  |  7  0  0  |  8  0
3877             9  0 10  | 11  0  0  | 12  0
3878     -------------------------------------
3879            13  0 14  | 15 16 17  |  0  0
3880     Proc1   0 18  0  | 19 20 21  |  0  0
3881             0  0  0  | 22 23  0  | 24  0
3882     -------------------------------------
3883     Proc2  25 26 27  |  0  0 28  | 29  0
3884            30  0  0  | 31 32 33  |  0 34
3885 .ve
3886 
3887    This can be represented as a collection of submatrices as:
3888 
3889 .vb
3890       A B C
3891       D E F
3892       G H I
3893 .ve
3894 
3895    Where the submatrices A,B,C are owned by proc0, D,E,F are
3896    owned by proc1, G,H,I are owned by proc2.
3897 
3898    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3899    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3900    The 'M','N' parameters are 8,8, and have the same values on all procs.
3901 
3902    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3903    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3904    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3905    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3906    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3907    matrix, ans [DF] as another SeqAIJ matrix.
3908 
3909    When d_nz, o_nz parameters are specified, d_nz storage elements are
3910    allocated for every row of the local diagonal submatrix, and o_nz
3911    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3912    One way to choose d_nz and o_nz is to use the max nonzerors per local
3913    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3914    In this case, the values of d_nz,o_nz are:
3915 .vb
3916      proc0 : dnz = 2, o_nz = 2
3917      proc1 : dnz = 3, o_nz = 2
3918      proc2 : dnz = 1, o_nz = 4
3919 .ve
3920    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3921    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3922    for proc3. i.e we are using 12+15+10=37 storage locations to store
3923    34 values.
3924 
3925    When d_nnz, o_nnz parameters are specified, the storage is specified
3926    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3927    In the above case the values for d_nnz,o_nnz are:
3928 .vb
3929      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3930      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3931      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3932 .ve
3933    Here the space allocated is sum of all the above values i.e 34, and
3934    hence pre-allocation is perfect.
3935 
3936    Level: intermediate
3937 
3938 .keywords: matrix, aij, compressed row, sparse, parallel
3939 
3940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3941           MPIAIJ, MatGetInfo()
3942 @*/
3943 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3944 {
3945   PetscErrorCode ierr;
3946 
3947   PetscFunctionBegin;
3948   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3949   PetscValidType(B,1);
3950   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3951   PetscFunctionReturn(0);
3952 }
3953 
3954 #undef __FUNCT__
3955 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3956 /*@
3957      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3958          CSR format the local rows.
3959 
3960    Collective on MPI_Comm
3961 
3962    Input Parameters:
3963 +  comm - MPI communicator
3964 .  m - number of local rows (Cannot be PETSC_DECIDE)
3965 .  n - This value should be the same as the local size used in creating the
3966        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3967        calculated if N is given) For square matrices n is almost always m.
3968 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3969 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3970 .   i - row indices
3971 .   j - column indices
3972 -   a - matrix values
3973 
3974    Output Parameter:
3975 .   mat - the matrix
3976 
3977    Level: intermediate
3978 
3979    Notes:
3980        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3981      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3982      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3983 
3984        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3985 
3986        The format which is used for the sparse matrix input, is equivalent to a
3987     row-major ordering.. i.e for the following matrix, the input data expected is
3988     as shown:
3989 
3990         1 0 0
3991         2 0 3     P0
3992        -------
3993         4 5 6     P1
3994 
3995      Process0 [P0]: rows_owned=[0,1]
3996         i =  {0,1,3}  [size = nrow+1  = 2+1]
3997         j =  {0,0,2}  [size = nz = 6]
3998         v =  {1,2,3}  [size = nz = 6]
3999 
4000      Process1 [P1]: rows_owned=[2]
4001         i =  {0,3}    [size = nrow+1  = 1+1]
4002         j =  {0,1,2}  [size = nz = 6]
4003         v =  {4,5,6}  [size = nz = 6]
4004 
4005 .keywords: matrix, aij, compressed row, sparse, parallel
4006 
4007 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4008           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4009 @*/
4010 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4011 {
4012   PetscErrorCode ierr;
4013 
4014   PetscFunctionBegin;
4015   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4016   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4017   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4018   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4019   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4020   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4021   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4022   PetscFunctionReturn(0);
4023 }
4024 
4025 #undef __FUNCT__
4026 #define __FUNCT__ "MatCreateAIJ"
4027 /*@C
4028    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4029    (the default parallel PETSc format).  For good matrix assembly performance
4030    the user should preallocate the matrix storage by setting the parameters
4031    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4032    performance can be increased by more than a factor of 50.
4033 
4034    Collective on MPI_Comm
4035 
4036    Input Parameters:
4037 +  comm - MPI communicator
4038 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4039            This value should be the same as the local size used in creating the
4040            y vector for the matrix-vector product y = Ax.
4041 .  n - This value should be the same as the local size used in creating the
4042        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4043        calculated if N is given) For square matrices n is almost always m.
4044 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4045 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4046 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4047            (same value is used for all local rows)
4048 .  d_nnz - array containing the number of nonzeros in the various rows of the
4049            DIAGONAL portion of the local submatrix (possibly different for each row)
4050            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4051            The size of this array is equal to the number of local rows, i.e 'm'.
4052 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4053            submatrix (same value is used for all local rows).
4054 -  o_nnz - array containing the number of nonzeros in the various rows of the
4055            OFF-DIAGONAL portion of the local submatrix (possibly different for
4056            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4057            structure. The size of this array is equal to the number
4058            of local rows, i.e 'm'.
4059 
4060    Output Parameter:
4061 .  A - the matrix
4062 
4063    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4064    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4065    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4066 
4067    Notes:
4068    If the *_nnz parameter is given then the *_nz parameter is ignored
4069 
4070    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4071    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4072    storage requirements for this matrix.
4073 
4074    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4075    processor than it must be used on all processors that share the object for
4076    that argument.
4077 
4078    The user MUST specify either the local or global matrix dimensions
4079    (possibly both).
4080 
4081    The parallel matrix is partitioned across processors such that the
4082    first m0 rows belong to process 0, the next m1 rows belong to
4083    process 1, the next m2 rows belong to process 2 etc.. where
4084    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4085    values corresponding to [m x N] submatrix.
4086 
4087    The columns are logically partitioned with the n0 columns belonging
4088    to 0th partition, the next n1 columns belonging to the next
4089    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4090 
4091    The DIAGONAL portion of the local submatrix on any given processor
4092    is the submatrix corresponding to the rows and columns m,n
4093    corresponding to the given processor. i.e diagonal matrix on
4094    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4095    etc. The remaining portion of the local submatrix [m x (N-n)]
4096    constitute the OFF-DIAGONAL portion. The example below better
4097    illustrates this concept.
4098 
4099    For a square global matrix we define each processor's diagonal portion
4100    to be its local rows and the corresponding columns (a square submatrix);
4101    each processor's off-diagonal portion encompasses the remainder of the
4102    local matrix (a rectangular submatrix).
4103 
4104    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4105 
4106    When calling this routine with a single process communicator, a matrix of
4107    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4108    type of communicator, use the construction mechanism:
4109      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4110 
4111    By default, this format uses inodes (identical nodes) when possible.
4112    We search for consecutive rows with the same nonzero structure, thereby
4113    reusing matrix information to achieve increased efficiency.
4114 
4115    Options Database Keys:
4116 +  -mat_no_inode  - Do not use inodes
4117 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4118 -  -mat_aij_oneindex - Internally use indexing starting at 1
4119         rather than 0.  Note that when calling MatSetValues(),
4120         the user still MUST index entries starting at 0!
4121 
4122 
4123    Example usage:
4124 
4125    Consider the following 8x8 matrix with 34 non-zero values, that is
4126    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4127    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4128    as follows:
4129 
4130 .vb
4131             1  2  0  |  0  3  0  |  0  4
4132     Proc0   0  5  6  |  7  0  0  |  8  0
4133             9  0 10  | 11  0  0  | 12  0
4134     -------------------------------------
4135            13  0 14  | 15 16 17  |  0  0
4136     Proc1   0 18  0  | 19 20 21  |  0  0
4137             0  0  0  | 22 23  0  | 24  0
4138     -------------------------------------
4139     Proc2  25 26 27  |  0  0 28  | 29  0
4140            30  0  0  | 31 32 33  |  0 34
4141 .ve
4142 
4143    This can be represented as a collection of submatrices as:
4144 
4145 .vb
4146       A B C
4147       D E F
4148       G H I
4149 .ve
4150 
4151    Where the submatrices A,B,C are owned by proc0, D,E,F are
4152    owned by proc1, G,H,I are owned by proc2.
4153 
4154    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4155    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4156    The 'M','N' parameters are 8,8, and have the same values on all procs.
4157 
4158    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4159    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4160    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4161    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4162    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4163    matrix, ans [DF] as another SeqAIJ matrix.
4164 
4165    When d_nz, o_nz parameters are specified, d_nz storage elements are
4166    allocated for every row of the local diagonal submatrix, and o_nz
4167    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4168    One way to choose d_nz and o_nz is to use the max nonzerors per local
4169    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4170    In this case, the values of d_nz,o_nz are:
4171 .vb
4172      proc0 : dnz = 2, o_nz = 2
4173      proc1 : dnz = 3, o_nz = 2
4174      proc2 : dnz = 1, o_nz = 4
4175 .ve
4176    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4177    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4178    for proc3. i.e we are using 12+15+10=37 storage locations to store
4179    34 values.
4180 
4181    When d_nnz, o_nnz parameters are specified, the storage is specified
4182    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4183    In the above case the values for d_nnz,o_nnz are:
4184 .vb
4185      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4186      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4187      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4188 .ve
4189    Here the space allocated is sum of all the above values i.e 34, and
4190    hence pre-allocation is perfect.
4191 
4192    Level: intermediate
4193 
4194 .keywords: matrix, aij, compressed row, sparse, parallel
4195 
4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4197           MPIAIJ, MatCreateMPIAIJWithArrays()
4198 @*/
4199 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4200 {
4201   PetscErrorCode ierr;
4202   PetscMPIInt    size;
4203 
4204   PetscFunctionBegin;
4205   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4206   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4207   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4208   if (size > 1) {
4209     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4210     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4211   } else {
4212     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4213     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4214   }
4215   PetscFunctionReturn(0);
4216 }
4217 
4218 #undef __FUNCT__
4219 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4220 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4221 {
4222   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4223 
4224   PetscFunctionBegin;
4225   *Ad     = a->A;
4226   *Ao     = a->B;
4227   *colmap = a->garray;
4228   PetscFunctionReturn(0);
4229 }
4230 
4231 #undef __FUNCT__
4232 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4233 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4234 {
4235   PetscErrorCode ierr;
4236   PetscInt       i;
4237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4238 
4239   PetscFunctionBegin;
4240   if (coloring->ctype == IS_COLORING_GLOBAL) {
4241     ISColoringValue *allcolors,*colors;
4242     ISColoring      ocoloring;
4243 
4244     /* set coloring for diagonal portion */
4245     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4246 
4247     /* set coloring for off-diagonal portion */
4248     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4249     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4250     for (i=0; i<a->B->cmap->n; i++) {
4251       colors[i] = allcolors[a->garray[i]];
4252     }
4253     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4254     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4255     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4256     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4257   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4258     ISColoringValue *colors;
4259     PetscInt        *larray;
4260     ISColoring      ocoloring;
4261 
4262     /* set coloring for diagonal portion */
4263     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4264     for (i=0; i<a->A->cmap->n; i++) {
4265       larray[i] = i + A->cmap->rstart;
4266     }
4267     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4268     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4269     for (i=0; i<a->A->cmap->n; i++) {
4270       colors[i] = coloring->colors[larray[i]];
4271     }
4272     ierr = PetscFree(larray);CHKERRQ(ierr);
4273     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4274     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4275     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4276 
4277     /* set coloring for off-diagonal portion */
4278     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4279     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4280     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4281     for (i=0; i<a->B->cmap->n; i++) {
4282       colors[i] = coloring->colors[larray[i]];
4283     }
4284     ierr = PetscFree(larray);CHKERRQ(ierr);
4285     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4286     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4287     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4288   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4289 
4290   PetscFunctionReturn(0);
4291 }
4292 
4293 #if defined(PETSC_HAVE_ADIC)
4294 #undef __FUNCT__
4295 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4296 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4297 {
4298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4299   PetscErrorCode ierr;
4300 
4301   PetscFunctionBegin;
4302   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4303   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4304   PetscFunctionReturn(0);
4305 }
4306 #endif
4307 
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4310 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4311 {
4312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4313   PetscErrorCode ierr;
4314 
4315   PetscFunctionBegin;
4316   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4317   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4323 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4324 {
4325   PetscErrorCode ierr;
4326   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4327   PetscInt       *indx;
4328 
4329   PetscFunctionBegin;
4330   /* This routine will ONLY return MPIAIJ type matrix */
4331   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4332   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4333   if (n == PETSC_DECIDE){
4334     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4335   }
4336   /* Check sum(n) = N */
4337   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4338   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4339 
4340   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4341   rstart -= m;
4342 
4343   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4344   for (i=0;i<m;i++) {
4345     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4346     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4347     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4348   }
4349 
4350   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4351   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4352   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4353   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4354   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4355   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4356   PetscFunctionReturn(0);
4357 }
4358 
4359 #undef __FUNCT__
4360 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4361 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4362 {
4363   PetscErrorCode ierr;
4364   PetscInt       m,N,i,rstart,nnz,Ii;
4365   PetscInt       *indx;
4366   PetscScalar    *values;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4370   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4371   for (i=0;i<m;i++) {
4372     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4373     Ii    = i + rstart;
4374     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4375     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376   }
4377   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 #undef __FUNCT__
4383 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4384 /*@
4385       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4386                  matrices from each processor
4387 
4388     Collective on MPI_Comm
4389 
4390    Input Parameters:
4391 +    comm - the communicators the parallel matrix will live on
4392 .    inmat - the input sequential matrices
4393 .    n - number of local columns (or PETSC_DECIDE)
4394 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4395 
4396    Output Parameter:
4397 .    outmat - the parallel matrix generated
4398 
4399     Level: advanced
4400 
4401    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4402 
4403 @*/
4404 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4405 {
4406   PetscErrorCode ierr;
4407 
4408   PetscFunctionBegin;
4409   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4410   if (scall == MAT_INITIAL_MATRIX){
4411     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4412   }
4413   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4414   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4415   PetscFunctionReturn(0);
4416 }
4417 
4418 #undef __FUNCT__
4419 #define __FUNCT__ "MatFileSplit"
4420 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4421 {
4422   PetscErrorCode    ierr;
4423   PetscMPIInt       rank;
4424   PetscInt          m,N,i,rstart,nnz;
4425   size_t            len;
4426   const PetscInt    *indx;
4427   PetscViewer       out;
4428   char              *name;
4429   Mat               B;
4430   const PetscScalar *values;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4434   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4435   /* Should this be the type of the diagonal block of A? */
4436   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4437   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4438   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4439   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4440   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4441   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4442   for (i=0;i<m;i++) {
4443     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4444     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4445     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4446   }
4447   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4448   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449 
4450   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4451   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4452   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4453   sprintf(name,"%s.%d",outfile,rank);
4454   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4455   ierr = PetscFree(name);
4456   ierr = MatView(B,out);CHKERRQ(ierr);
4457   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4458   ierr = MatDestroy(&B);CHKERRQ(ierr);
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4463 #undef __FUNCT__
4464 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4465 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4466 {
4467   PetscErrorCode       ierr;
4468   Mat_Merge_SeqsToMPI  *merge;
4469   PetscContainer       container;
4470 
4471   PetscFunctionBegin;
4472   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4473   if (container) {
4474     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4487     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4488     ierr = PetscFree(merge);CHKERRQ(ierr);
4489     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4490   }
4491   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #include <../src/mat/utils/freespace.h>
4496 #include <petscbt.h>
4497 
4498 #undef __FUNCT__
4499 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4500 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4501 {
4502   PetscErrorCode       ierr;
4503   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4504   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4505   PetscMPIInt          size,rank,taga,*len_s;
4506   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4507   PetscInt             proc,m;
4508   PetscInt             **buf_ri,**buf_rj;
4509   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4510   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4511   MPI_Request          *s_waits,*r_waits;
4512   MPI_Status           *status;
4513   MatScalar            *aa=a->a;
4514   MatScalar            **abuf_r,*ba_i;
4515   Mat_Merge_SeqsToMPI  *merge;
4516   PetscContainer       container;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4520 
4521   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4522   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4523 
4524   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4525   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4526 
4527   bi     = merge->bi;
4528   bj     = merge->bj;
4529   buf_ri = merge->buf_ri;
4530   buf_rj = merge->buf_rj;
4531 
4532   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4533   owners = merge->rowmap->range;
4534   len_s  = merge->len_s;
4535 
4536   /* send and recv matrix values */
4537   /*-----------------------------*/
4538   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4539   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4540 
4541   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4542   for (proc=0,k=0; proc<size; proc++){
4543     if (!len_s[proc]) continue;
4544     i = owners[proc];
4545     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4546     k++;
4547   }
4548 
4549   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4550   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4551   ierr = PetscFree(status);CHKERRQ(ierr);
4552 
4553   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4554   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4555 
4556   /* insert mat values of mpimat */
4557   /*----------------------------*/
4558   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4559   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4560 
4561   for (k=0; k<merge->nrecv; k++){
4562     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4563     nrows = *(buf_ri_k[k]);
4564     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4565     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4566   }
4567 
4568   /* set values of ba */
4569   m = merge->rowmap->n;
4570   for (i=0; i<m; i++) {
4571     arow = owners[rank] + i;
4572     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4573     bnzi = bi[i+1] - bi[i];
4574     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4575 
4576     /* add local non-zero vals of this proc's seqmat into ba */
4577     anzi = ai[arow+1] - ai[arow];
4578     aj   = a->j + ai[arow];
4579     aa   = a->a + ai[arow];
4580     nextaj = 0;
4581     for (j=0; nextaj<anzi; j++){
4582       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4583         ba_i[j] += aa[nextaj++];
4584       }
4585     }
4586 
4587     /* add received vals into ba */
4588     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4589       /* i-th row */
4590       if (i == *nextrow[k]) {
4591         anzi = *(nextai[k]+1) - *nextai[k];
4592         aj   = buf_rj[k] + *(nextai[k]);
4593         aa   = abuf_r[k] + *(nextai[k]);
4594         nextaj = 0;
4595         for (j=0; nextaj<anzi; j++){
4596           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4597             ba_i[j] += aa[nextaj++];
4598           }
4599         }
4600         nextrow[k]++; nextai[k]++;
4601       }
4602     }
4603     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4604   }
4605   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4606   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4607 
4608   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4609   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4610   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4611   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4612   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4613   PetscFunctionReturn(0);
4614 }
4615 
4616 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4617 
4618 #undef __FUNCT__
4619 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4620 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4621 {
4622   PetscErrorCode       ierr;
4623   Mat                  B_mpi;
4624   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4625   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4626   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4627   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4628   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4629   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4630   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4631   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4632   MPI_Status           *status;
4633   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4634   PetscBT              lnkbt;
4635   Mat_Merge_SeqsToMPI  *merge;
4636   PetscContainer       container;
4637 
4638   PetscFunctionBegin;
4639   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4640 
4641   /* make sure it is a PETSc comm */
4642   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4643   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4644   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4645 
4646   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4647   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4648 
4649   /* determine row ownership */
4650   /*---------------------------------------------------------*/
4651   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4652   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4654   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4655   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4656   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4657   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4658 
4659   m      = merge->rowmap->n;
4660   M      = merge->rowmap->N;
4661   owners = merge->rowmap->range;
4662 
4663   /* determine the number of messages to send, their lengths */
4664   /*---------------------------------------------------------*/
4665   len_s  = merge->len_s;
4666 
4667   len = 0;  /* length of buf_si[] */
4668   merge->nsend = 0;
4669   for (proc=0; proc<size; proc++){
4670     len_si[proc] = 0;
4671     if (proc == rank){
4672       len_s[proc] = 0;
4673     } else {
4674       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4675       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4676     }
4677     if (len_s[proc]) {
4678       merge->nsend++;
4679       nrows = 0;
4680       for (i=owners[proc]; i<owners[proc+1]; i++){
4681         if (ai[i+1] > ai[i]) nrows++;
4682       }
4683       len_si[proc] = 2*(nrows+1);
4684       len += len_si[proc];
4685     }
4686   }
4687 
4688   /* determine the number and length of messages to receive for ij-structure */
4689   /*-------------------------------------------------------------------------*/
4690   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4691   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4692 
4693   /* post the Irecv of j-structure */
4694   /*-------------------------------*/
4695   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4696   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4697 
4698   /* post the Isend of j-structure */
4699   /*--------------------------------*/
4700   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4701 
4702   for (proc=0, k=0; proc<size; proc++){
4703     if (!len_s[proc]) continue;
4704     i = owners[proc];
4705     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4706     k++;
4707   }
4708 
4709   /* receives and sends of j-structure are complete */
4710   /*------------------------------------------------*/
4711   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4712   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4713 
4714   /* send and recv i-structure */
4715   /*---------------------------*/
4716   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4717   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4718 
4719   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4720   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4721   for (proc=0,k=0; proc<size; proc++){
4722     if (!len_s[proc]) continue;
4723     /* form outgoing message for i-structure:
4724          buf_si[0]:                 nrows to be sent
4725                [1:nrows]:           row index (global)
4726                [nrows+1:2*nrows+1]: i-structure index
4727     */
4728     /*-------------------------------------------*/
4729     nrows = len_si[proc]/2 - 1;
4730     buf_si_i    = buf_si + nrows+1;
4731     buf_si[0]   = nrows;
4732     buf_si_i[0] = 0;
4733     nrows = 0;
4734     for (i=owners[proc]; i<owners[proc+1]; i++){
4735       anzi = ai[i+1] - ai[i];
4736       if (anzi) {
4737         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4738         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4739         nrows++;
4740       }
4741     }
4742     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4743     k++;
4744     buf_si += len_si[proc];
4745   }
4746 
4747   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4748   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4749 
4750   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4751   for (i=0; i<merge->nrecv; i++){
4752     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4753   }
4754 
4755   ierr = PetscFree(len_si);CHKERRQ(ierr);
4756   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4757   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4758   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4759   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4760   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4761   ierr = PetscFree(status);CHKERRQ(ierr);
4762 
4763   /* compute a local seq matrix in each processor */
4764   /*----------------------------------------------*/
4765   /* allocate bi array and free space for accumulating nonzero column info */
4766   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4767   bi[0] = 0;
4768 
4769   /* create and initialize a linked list */
4770   nlnk = N+1;
4771   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4772 
4773   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4774   len = 0;
4775   len  = ai[owners[rank+1]] - ai[owners[rank]];
4776   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4777   current_space = free_space;
4778 
4779   /* determine symbolic info for each local row */
4780   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4781 
4782   for (k=0; k<merge->nrecv; k++){
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows = *buf_ri_k[k];
4785     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4790   len = 0;
4791   for (i=0;i<m;i++) {
4792     bnzi   = 0;
4793     /* add local non-zero cols of this proc's seqmat into lnk */
4794     arow   = owners[rank] + i;
4795     anzi   = ai[arow+1] - ai[arow];
4796     aj     = a->j + ai[arow];
4797     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4798     bnzi += nlnk;
4799     /* add received col data into lnk */
4800     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4801       if (i == *nextrow[k]) { /* i-th row */
4802         anzi = *(nextai[k]+1) - *nextai[k];
4803         aj   = buf_rj[k] + *nextai[k];
4804         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4805         bnzi += nlnk;
4806         nextrow[k]++; nextai[k]++;
4807       }
4808     }
4809     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4810 
4811     /* if free space is not available, make more free space */
4812     if (current_space->local_remaining<bnzi) {
4813       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4814       nspacedouble++;
4815     }
4816     /* copy data into free space, then initialize lnk */
4817     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4818     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4819 
4820     current_space->array           += bnzi;
4821     current_space->local_used      += bnzi;
4822     current_space->local_remaining -= bnzi;
4823 
4824     bi[i+1] = bi[i] + bnzi;
4825   }
4826 
4827   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4828 
4829   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4830   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4831   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4832 
4833   /* create symbolic parallel matrix B_mpi */
4834   /*---------------------------------------*/
4835     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4836   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4837   if (n==PETSC_DECIDE) {
4838     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4839   } else {
4840     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4841   }
4842   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4843   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4844   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4845   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4846   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4847 
4848   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4849   B_mpi->assembled     = PETSC_FALSE;
4850   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4851   merge->bi            = bi;
4852   merge->bj            = bj;
4853   merge->buf_ri        = buf_ri;
4854   merge->buf_rj        = buf_rj;
4855   merge->coi           = PETSC_NULL;
4856   merge->coj           = PETSC_NULL;
4857   merge->owners_co     = PETSC_NULL;
4858 
4859   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4860 
4861   /* attach the supporting struct to B_mpi for reuse */
4862   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4863   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4864   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4865   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4866   *mpimat = B_mpi;
4867 
4868   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 #undef __FUNCT__
4873 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4874 /*@C
4875       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4876                  matrices from each processor
4877 
4878     Collective on MPI_Comm
4879 
4880    Input Parameters:
4881 +    comm - the communicators the parallel matrix will live on
4882 .    seqmat - the input sequential matrices
4883 .    m - number of local rows (or PETSC_DECIDE)
4884 .    n - number of local columns (or PETSC_DECIDE)
4885 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4886 
4887    Output Parameter:
4888 .    mpimat - the parallel matrix generated
4889 
4890     Level: advanced
4891 
4892    Notes:
4893      The dimensions of the sequential matrix in each processor MUST be the same.
4894      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4895      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4896 @*/
4897 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4898 {
4899   PetscErrorCode   ierr;
4900   PetscMPIInt     size;
4901 
4902   PetscFunctionBegin;
4903   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4904   if (size == 1){
4905      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4906      if (scall == MAT_INITIAL_MATRIX){
4907        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4908      } else {
4909        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4910      }
4911      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4912      PetscFunctionReturn(0);
4913   }
4914   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915   if (scall == MAT_INITIAL_MATRIX){
4916     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4917   }
4918   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4919   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920   PetscFunctionReturn(0);
4921 }
4922 
4923 #undef __FUNCT__
4924 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4925 /*@
4926      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4927           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4928           with MatGetSize()
4929 
4930     Not Collective
4931 
4932    Input Parameters:
4933 +    A - the matrix
4934 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935 
4936    Output Parameter:
4937 .    A_loc - the local sequential matrix generated
4938 
4939     Level: developer
4940 
4941 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4942 
4943 @*/
4944 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4945 {
4946   PetscErrorCode  ierr;
4947   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4948   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4949   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4950   MatScalar       *aa=a->a,*ba=b->a,*cam;
4951   PetscScalar     *ca;
4952   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4953   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4954   PetscBool       match;
4955 
4956   PetscFunctionBegin;
4957   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4958   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4959   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4960   if (scall == MAT_INITIAL_MATRIX){
4961     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4962     ci[0] = 0;
4963     for (i=0; i<am; i++){
4964       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4965     }
4966     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4967     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4968     k = 0;
4969     for (i=0; i<am; i++) {
4970       ncols_o = bi[i+1] - bi[i];
4971       ncols_d = ai[i+1] - ai[i];
4972       /* off-diagonal portion of A */
4973       for (jo=0; jo<ncols_o; jo++) {
4974         col = cmap[*bj];
4975         if (col >= cstart) break;
4976         cj[k]   = col; bj++;
4977         ca[k++] = *ba++;
4978       }
4979       /* diagonal portion of A */
4980       for (j=0; j<ncols_d; j++) {
4981         cj[k]   = cstart + *aj++;
4982         ca[k++] = *aa++;
4983       }
4984       /* off-diagonal portion of A */
4985       for (j=jo; j<ncols_o; j++) {
4986         cj[k]   = cmap[*bj++];
4987         ca[k++] = *ba++;
4988       }
4989     }
4990     /* put together the new matrix */
4991     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4992     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4993     /* Since these are PETSc arrays, change flags to free them as necessary. */
4994     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4995     mat->free_a  = PETSC_TRUE;
4996     mat->free_ij = PETSC_TRUE;
4997     mat->nonew   = 0;
4998   } else if (scall == MAT_REUSE_MATRIX){
4999     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5000     ci = mat->i; cj = mat->j; cam = mat->a;
5001     for (i=0; i<am; i++) {
5002       /* off-diagonal portion of A */
5003       ncols_o = bi[i+1] - bi[i];
5004       for (jo=0; jo<ncols_o; jo++) {
5005         col = cmap[*bj];
5006         if (col >= cstart) break;
5007         *cam++ = *ba++; bj++;
5008       }
5009       /* diagonal portion of A */
5010       ncols_d = ai[i+1] - ai[i];
5011       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5012       /* off-diagonal portion of A */
5013       for (j=jo; j<ncols_o; j++) {
5014         *cam++ = *ba++; bj++;
5015       }
5016     }
5017   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5018   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5024 /*@C
5025      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5026 
5027     Not Collective
5028 
5029    Input Parameters:
5030 +    A - the matrix
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5033 
5034    Output Parameter:
5035 .    A_loc - the local sequential matrix generated
5036 
5037     Level: developer
5038 
5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5040 
5041 @*/
5042 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5043 {
5044   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5045   PetscErrorCode    ierr;
5046   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5047   IS                isrowa,iscola;
5048   Mat               *aloc;
5049   PetscBool       match;
5050 
5051   PetscFunctionBegin;
5052   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5053   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5054   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5055   if (!row){
5056     start = A->rmap->rstart; end = A->rmap->rend;
5057     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5058   } else {
5059     isrowa = *row;
5060   }
5061   if (!col){
5062     start = A->cmap->rstart;
5063     cmap  = a->garray;
5064     nzA   = a->A->cmap->n;
5065     nzB   = a->B->cmap->n;
5066     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5067     ncols = 0;
5068     for (i=0; i<nzB; i++) {
5069       if (cmap[i] < start) idx[ncols++] = cmap[i];
5070       else break;
5071     }
5072     imark = i;
5073     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5074     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5075     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5076   } else {
5077     iscola = *col;
5078   }
5079   if (scall != MAT_INITIAL_MATRIX){
5080     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5081     aloc[0] = *A_loc;
5082   }
5083   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5084   *A_loc = aloc[0];
5085   ierr = PetscFree(aloc);CHKERRQ(ierr);
5086   if (!row){
5087     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5088   }
5089   if (!col){
5090     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5091   }
5092   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 #undef __FUNCT__
5097 #define __FUNCT__ "MatGetBrowsOfAcols"
5098 /*@C
5099     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5100 
5101     Collective on Mat
5102 
5103    Input Parameters:
5104 +    A,B - the matrices in mpiaij format
5105 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5106 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5107 
5108    Output Parameter:
5109 +    rowb, colb - index sets of rows and columns of B to extract
5110 -    B_seq - the sequential matrix generated
5111 
5112     Level: developer
5113 
5114 @*/
5115 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5116 {
5117   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5118   PetscErrorCode    ierr;
5119   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5120   IS                isrowb,iscolb;
5121   Mat               *bseq=PETSC_NULL;
5122 
5123   PetscFunctionBegin;
5124   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5125     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5126   }
5127   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5128 
5129   if (scall == MAT_INITIAL_MATRIX){
5130     start = A->cmap->rstart;
5131     cmap  = a->garray;
5132     nzA   = a->A->cmap->n;
5133     nzB   = a->B->cmap->n;
5134     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5135     ncols = 0;
5136     for (i=0; i<nzB; i++) {  /* row < local row index */
5137       if (cmap[i] < start) idx[ncols++] = cmap[i];
5138       else break;
5139     }
5140     imark = i;
5141     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5142     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5143     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5144     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5145   } else {
5146     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5147     isrowb = *rowb; iscolb = *colb;
5148     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5149     bseq[0] = *B_seq;
5150   }
5151   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5152   *B_seq = bseq[0];
5153   ierr = PetscFree(bseq);CHKERRQ(ierr);
5154   if (!rowb){
5155     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5156   } else {
5157     *rowb = isrowb;
5158   }
5159   if (!colb){
5160     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5161   } else {
5162     *colb = iscolb;
5163   }
5164   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 #undef __FUNCT__
5169 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5170 /*
5171     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5172     of the OFF-DIAGONAL portion of local A
5173 
5174     Collective on Mat
5175 
5176    Input Parameters:
5177 +    A,B - the matrices in mpiaij format
5178 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5179 
5180    Output Parameter:
5181 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5182 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5183 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5184 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5185 
5186     Level: developer
5187 
5188 */
5189 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5190 {
5191   VecScatter_MPI_General *gen_to,*gen_from;
5192   PetscErrorCode         ierr;
5193   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5194   Mat_SeqAIJ             *b_oth;
5195   VecScatter             ctx=a->Mvctx;
5196   MPI_Comm               comm=((PetscObject)ctx)->comm;
5197   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5198   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5199   PetscScalar            *rvalues,*svalues;
5200   MatScalar              *b_otha,*bufa,*bufA;
5201   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5202   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5203   MPI_Status             *sstatus,rstatus;
5204   PetscMPIInt            jj;
5205   PetscInt               *cols,sbs,rbs;
5206   PetscScalar            *vals;
5207 
5208   PetscFunctionBegin;
5209   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5210     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5211   }
5212   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5214 
5215   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5216   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5217   rvalues  = gen_from->values; /* holds the length of receiving row */
5218   svalues  = gen_to->values;   /* holds the length of sending row */
5219   nrecvs   = gen_from->n;
5220   nsends   = gen_to->n;
5221 
5222   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5223   srow     = gen_to->indices;   /* local row index to be sent */
5224   sstarts  = gen_to->starts;
5225   sprocs   = gen_to->procs;
5226   sstatus  = gen_to->sstatus;
5227   sbs      = gen_to->bs;
5228   rstarts  = gen_from->starts;
5229   rprocs   = gen_from->procs;
5230   rbs      = gen_from->bs;
5231 
5232   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5233   if (scall == MAT_INITIAL_MATRIX){
5234     /* i-array */
5235     /*---------*/
5236     /*  post receives */
5237     for (i=0; i<nrecvs; i++){
5238       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5239       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5240       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5241     }
5242 
5243     /* pack the outgoing message */
5244     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5245     sstartsj[0] = 0;  rstartsj[0] = 0;
5246     len = 0; /* total length of j or a array to be sent */
5247     k = 0;
5248     for (i=0; i<nsends; i++){
5249       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5250       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5251       for (j=0; j<nrows; j++) {
5252         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5253         for (l=0; l<sbs; l++){
5254           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5255           rowlen[j*sbs+l] = ncols;
5256           len += ncols;
5257           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5258         }
5259         k++;
5260       }
5261       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5262       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5263     }
5264     /* recvs and sends of i-array are completed */
5265     i = nrecvs;
5266     while (i--) {
5267       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5268     }
5269     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5270 
5271     /* allocate buffers for sending j and a arrays */
5272     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5273     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5274 
5275     /* create i-array of B_oth */
5276     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5277     b_othi[0] = 0;
5278     len = 0; /* total length of j or a array to be received */
5279     k = 0;
5280     for (i=0; i<nrecvs; i++){
5281       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5282       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5283       for (j=0; j<nrows; j++) {
5284         b_othi[k+1] = b_othi[k] + rowlen[j];
5285         len += rowlen[j]; k++;
5286       }
5287       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5288     }
5289 
5290     /* allocate space for j and a arrrays of B_oth */
5291     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5292     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5293 
5294     /* j-array */
5295     /*---------*/
5296     /*  post receives of j-array */
5297     for (i=0; i<nrecvs; i++){
5298       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5299       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5300     }
5301 
5302     /* pack the outgoing message j-array */
5303     k = 0;
5304     for (i=0; i<nsends; i++){
5305       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5306       bufJ = bufj+sstartsj[i];
5307       for (j=0; j<nrows; j++) {
5308         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5309         for (ll=0; ll<sbs; ll++){
5310           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5311           for (l=0; l<ncols; l++){
5312             *bufJ++ = cols[l];
5313           }
5314           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5315         }
5316       }
5317       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5318     }
5319 
5320     /* recvs and sends of j-array are completed */
5321     i = nrecvs;
5322     while (i--) {
5323       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5324     }
5325     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5326   } else if (scall == MAT_REUSE_MATRIX){
5327     sstartsj = *startsj_s;
5328     rstartsj = *startsj_r;
5329     bufa     = *bufa_ptr;
5330     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5331     b_otha   = b_oth->a;
5332   } else {
5333     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5334   }
5335 
5336   /* a-array */
5337   /*---------*/
5338   /*  post receives of a-array */
5339   for (i=0; i<nrecvs; i++){
5340     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5341     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5342   }
5343 
5344   /* pack the outgoing message a-array */
5345   k = 0;
5346   for (i=0; i<nsends; i++){
5347     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5348     bufA = bufa+sstartsj[i];
5349     for (j=0; j<nrows; j++) {
5350       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5351       for (ll=0; ll<sbs; ll++){
5352         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5353         for (l=0; l<ncols; l++){
5354           *bufA++ = vals[l];
5355         }
5356         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5357       }
5358     }
5359     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5360   }
5361   /* recvs and sends of a-array are completed */
5362   i = nrecvs;
5363   while (i--) {
5364     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5365   }
5366   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5367   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5368 
5369   if (scall == MAT_INITIAL_MATRIX){
5370     /* put together the new matrix */
5371     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5372 
5373     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5374     /* Since these are PETSc arrays, change flags to free them as necessary. */
5375     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5376     b_oth->free_a  = PETSC_TRUE;
5377     b_oth->free_ij = PETSC_TRUE;
5378     b_oth->nonew   = 0;
5379 
5380     ierr = PetscFree(bufj);CHKERRQ(ierr);
5381     if (!startsj_s || !bufa_ptr){
5382       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5383       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5384     } else {
5385       *startsj_s = sstartsj;
5386       *startsj_r = rstartsj;
5387       *bufa_ptr  = bufa;
5388     }
5389   }
5390   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 #undef __FUNCT__
5395 #define __FUNCT__ "MatGetCommunicationStructs"
5396 /*@C
5397   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5398 
5399   Not Collective
5400 
5401   Input Parameters:
5402 . A - The matrix in mpiaij format
5403 
5404   Output Parameter:
5405 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5406 . colmap - A map from global column index to local index into lvec
5407 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5408 
5409   Level: developer
5410 
5411 @*/
5412 #if defined (PETSC_USE_CTABLE)
5413 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5414 #else
5415 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5416 #endif
5417 {
5418   Mat_MPIAIJ *a;
5419 
5420   PetscFunctionBegin;
5421   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5422   PetscValidPointer(lvec, 2);
5423   PetscValidPointer(colmap, 3);
5424   PetscValidPointer(multScatter, 4);
5425   a = (Mat_MPIAIJ *) A->data;
5426   if (lvec) *lvec = a->lvec;
5427   if (colmap) *colmap = a->colmap;
5428   if (multScatter) *multScatter = a->Mvctx;
5429   PetscFunctionReturn(0);
5430 }
5431 
5432 EXTERN_C_BEGIN
5433 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5434 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5435 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5436 EXTERN_C_END
5437 
5438 #undef __FUNCT__
5439 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5440 /*
5441     Computes (B'*A')' since computing B*A directly is untenable
5442 
5443                n                       p                          p
5444         (              )       (              )         (                  )
5445       m (      A       )  *  n (       B      )   =   m (         C        )
5446         (              )       (              )         (                  )
5447 
5448 */
5449 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5450 {
5451   PetscErrorCode     ierr;
5452   Mat                At,Bt,Ct;
5453 
5454   PetscFunctionBegin;
5455   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5456   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5457   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5458   ierr = MatDestroy(&At);CHKERRQ(ierr);
5459   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5460   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5461   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5462   PetscFunctionReturn(0);
5463 }
5464 
5465 #undef __FUNCT__
5466 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5467 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5468 {
5469   PetscErrorCode ierr;
5470   PetscInt       m=A->rmap->n,n=B->cmap->n;
5471   Mat            Cmat;
5472 
5473   PetscFunctionBegin;
5474   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5475   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5476   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5477   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5478   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5479   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5480   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5481   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5482   *C   = Cmat;
5483   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 /* ----------------------------------------------------------------*/
5488 #undef __FUNCT__
5489 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5490 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5491 {
5492   PetscErrorCode ierr;
5493 
5494   PetscFunctionBegin;
5495   if (scall == MAT_INITIAL_MATRIX){
5496     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5497   }
5498   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 EXTERN_C_BEGIN
5503 #if defined(PETSC_HAVE_MUMPS)
5504 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5505 #endif
5506 #if defined(PETSC_HAVE_PASTIX)
5507 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5508 #endif
5509 #if defined(PETSC_HAVE_SUPERLU_DIST)
5510 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5511 #endif
5512 #if defined(PETSC_HAVE_SPOOLES)
5513 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5514 #endif
5515 EXTERN_C_END
5516 
5517 /*MC
5518    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5519 
5520    Options Database Keys:
5521 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5522 
5523   Level: beginner
5524 
5525 .seealso: MatCreateAIJ()
5526 M*/
5527 
5528 EXTERN_C_BEGIN
5529 #undef __FUNCT__
5530 #define __FUNCT__ "MatCreate_MPIAIJ"
5531 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5532 {
5533   Mat_MPIAIJ     *b;
5534   PetscErrorCode ierr;
5535   PetscMPIInt    size;
5536 
5537   PetscFunctionBegin;
5538   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5539 
5540   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5541   B->data         = (void*)b;
5542   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5543   B->assembled    = PETSC_FALSE;
5544 
5545   B->insertmode   = NOT_SET_VALUES;
5546   b->size         = size;
5547   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5548 
5549   /* build cache for off array entries formed */
5550   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5551   b->donotstash  = PETSC_FALSE;
5552   b->colmap      = 0;
5553   b->garray      = 0;
5554   b->roworiented = PETSC_TRUE;
5555 
5556   /* stuff used for matrix vector multiply */
5557   b->lvec      = PETSC_NULL;
5558   b->Mvctx     = PETSC_NULL;
5559 
5560   /* stuff for MatGetRow() */
5561   b->rowindices   = 0;
5562   b->rowvalues    = 0;
5563   b->getrowactive = PETSC_FALSE;
5564 
5565 #if defined(PETSC_HAVE_SPOOLES)
5566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5567                                      "MatGetFactor_mpiaij_spooles",
5568                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5569 #endif
5570 #if defined(PETSC_HAVE_MUMPS)
5571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5572                                      "MatGetFactor_aij_mumps",
5573                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5574 #endif
5575 #if defined(PETSC_HAVE_PASTIX)
5576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5577 					   "MatGetFactor_mpiaij_pastix",
5578 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5579 #endif
5580 #if defined(PETSC_HAVE_SUPERLU_DIST)
5581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5582                                      "MatGetFactor_mpiaij_superlu_dist",
5583                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5584 #endif
5585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5586                                      "MatStoreValues_MPIAIJ",
5587                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5589                                      "MatRetrieveValues_MPIAIJ",
5590                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5592 				     "MatGetDiagonalBlock_MPIAIJ",
5593                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5595 				     "MatIsTranspose_MPIAIJ",
5596 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5598 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5599 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5601 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5602 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5604 				     "MatDiagonalScaleLocal_MPIAIJ",
5605 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5607                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5608                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5610                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5611                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5613                                      "MatConvert_MPIAIJ_MPISBAIJ",
5614                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5616                                      "MatMatMult_MPIDense_MPIAIJ",
5617                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5619                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5620                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5622                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5623                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5625   PetscFunctionReturn(0);
5626 }
5627 EXTERN_C_END
5628 
5629 #undef __FUNCT__
5630 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5631 /*@
5632      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5633          and "off-diagonal" part of the matrix in CSR format.
5634 
5635    Collective on MPI_Comm
5636 
5637    Input Parameters:
5638 +  comm - MPI communicator
5639 .  m - number of local rows (Cannot be PETSC_DECIDE)
5640 .  n - This value should be the same as the local size used in creating the
5641        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5642        calculated if N is given) For square matrices n is almost always m.
5643 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5644 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5645 .   i - row indices for "diagonal" portion of matrix
5646 .   j - column indices
5647 .   a - matrix values
5648 .   oi - row indices for "off-diagonal" portion of matrix
5649 .   oj - column indices
5650 -   oa - matrix values
5651 
5652    Output Parameter:
5653 .   mat - the matrix
5654 
5655    Level: advanced
5656 
5657    Notes:
5658        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5659        must free the arrays once the matrix has been destroyed and not before.
5660 
5661        The i and j indices are 0 based
5662 
5663        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5664 
5665        This sets local rows and cannot be used to set off-processor values.
5666 
5667        You cannot later use MatSetValues() to change values in this matrix.
5668 
5669 .keywords: matrix, aij, compressed row, sparse, parallel
5670 
5671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5672           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5673 @*/
5674 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5675 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5676 {
5677   PetscErrorCode ierr;
5678   Mat_MPIAIJ     *maij;
5679 
5680  PetscFunctionBegin;
5681   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5682   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5683   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5684   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5685   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5686   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5687   maij = (Mat_MPIAIJ*) (*mat)->data;
5688   maij->donotstash     = PETSC_TRUE;
5689   (*mat)->preallocated = PETSC_TRUE;
5690 
5691   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5692   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5693 
5694   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5695   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5696 
5697   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   PetscFunctionReturn(0);
5705 }
5706 
5707 /*
5708     Special version for direct calls from Fortran
5709 */
5710 #include <petsc-private/fortranimpl.h>
5711 
5712 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5713 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5714 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5715 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5716 #endif
5717 
5718 /* Change these macros so can be used in void function */
5719 #undef CHKERRQ
5720 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5721 #undef SETERRQ2
5722 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5723 #undef SETERRQ3
5724 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5725 #undef SETERRQ
5726 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5727 
5728 EXTERN_C_BEGIN
5729 #undef __FUNCT__
5730 #define __FUNCT__ "matsetvaluesmpiaij_"
5731 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5732 {
5733   Mat             mat = *mmat;
5734   PetscInt        m = *mm, n = *mn;
5735   InsertMode      addv = *maddv;
5736   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5737   PetscScalar     value;
5738   PetscErrorCode  ierr;
5739 
5740   MatCheckPreallocated(mat,1);
5741   if (mat->insertmode == NOT_SET_VALUES) {
5742     mat->insertmode = addv;
5743   }
5744 #if defined(PETSC_USE_DEBUG)
5745   else if (mat->insertmode != addv) {
5746     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5747   }
5748 #endif
5749   {
5750   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5751   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5752   PetscBool       roworiented = aij->roworiented;
5753 
5754   /* Some Variables required in the macro */
5755   Mat             A = aij->A;
5756   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5757   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5758   MatScalar       *aa = a->a;
5759   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5760   Mat             B = aij->B;
5761   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5762   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5763   MatScalar       *ba = b->a;
5764 
5765   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5766   PetscInt        nonew = a->nonew;
5767   MatScalar       *ap1,*ap2;
5768 
5769   PetscFunctionBegin;
5770   for (i=0; i<m; i++) {
5771     if (im[i] < 0) continue;
5772 #if defined(PETSC_USE_DEBUG)
5773     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5774 #endif
5775     if (im[i] >= rstart && im[i] < rend) {
5776       row      = im[i] - rstart;
5777       lastcol1 = -1;
5778       rp1      = aj + ai[row];
5779       ap1      = aa + ai[row];
5780       rmax1    = aimax[row];
5781       nrow1    = ailen[row];
5782       low1     = 0;
5783       high1    = nrow1;
5784       lastcol2 = -1;
5785       rp2      = bj + bi[row];
5786       ap2      = ba + bi[row];
5787       rmax2    = bimax[row];
5788       nrow2    = bilen[row];
5789       low2     = 0;
5790       high2    = nrow2;
5791 
5792       for (j=0; j<n; j++) {
5793         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5794         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5795         if (in[j] >= cstart && in[j] < cend){
5796           col = in[j] - cstart;
5797           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5798         } else if (in[j] < 0) continue;
5799 #if defined(PETSC_USE_DEBUG)
5800         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5801 #endif
5802         else {
5803           if (mat->was_assembled) {
5804             if (!aij->colmap) {
5805               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5806             }
5807 #if defined (PETSC_USE_CTABLE)
5808             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5809 	    col--;
5810 #else
5811             col = aij->colmap[in[j]] - 1;
5812 #endif
5813             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5814               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5815               col =  in[j];
5816               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5817               B = aij->B;
5818               b = (Mat_SeqAIJ*)B->data;
5819               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5820               rp2      = bj + bi[row];
5821               ap2      = ba + bi[row];
5822               rmax2    = bimax[row];
5823               nrow2    = bilen[row];
5824               low2     = 0;
5825               high2    = nrow2;
5826               bm       = aij->B->rmap->n;
5827               ba = b->a;
5828             }
5829           } else col = in[j];
5830           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5831         }
5832       }
5833     } else {
5834       if (!aij->donotstash) {
5835         if (roworiented) {
5836           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5837         } else {
5838           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5839         }
5840       }
5841     }
5842   }}
5843   PetscFunctionReturnVoid();
5844 }
5845 EXTERN_C_END
5846 
5847