xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ea5d4fccf296dd2bbd0f9c3a3343651cb1066da7)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           nonew = a->nonew;
506           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
507         } else if (in[j] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
510 #endif
511         else {
512           if (mat->was_assembled) {
513             if (!aij->colmap) {
514               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
515             }
516 #if defined (PETSC_USE_CTABLE)
517             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
518 	    col--;
519 #else
520             col = aij->colmap[in[j]] - 1;
521 #endif
522             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
523               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524               col =  in[j];
525               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
526               B = aij->B;
527               b = (Mat_SeqAIJ*)B->data;
528               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
529               rp2      = bj + bi[row];
530               ap2      = ba + bi[row];
531               rmax2    = bimax[row];
532               nrow2    = bilen[row];
533               low2     = 0;
534               high2    = nrow2;
535               bm       = aij->B->rmap->n;
536               ba = b->a;
537             }
538           } else col = in[j];
539           nonew = b->nonew;
540           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
541         }
542       }
543     } else {
544       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
545       if (!aij->donotstash) {
546         mat->assembled = PETSC_FALSE;
547         if (roworiented) {
548           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatGetValues_MPIAIJ"
560 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
561 {
562   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
563   PetscErrorCode ierr;
564   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
565   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
566 
567   PetscFunctionBegin;
568   for (i=0; i<m; i++) {
569     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
570     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
571     if (idxm[i] >= rstart && idxm[i] < rend) {
572       row = idxm[i] - rstart;
573       for (j=0; j<n; j++) {
574         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
575         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
576         if (idxn[j] >= cstart && idxn[j] < cend){
577           col = idxn[j] - cstart;
578           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
579         } else {
580           if (!aij->colmap) {
581             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
582           }
583 #if defined (PETSC_USE_CTABLE)
584           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
585           col --;
586 #else
587           col = aij->colmap[idxn[j]] - 1;
588 #endif
589           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
590           else {
591             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
592           }
593         }
594       }
595     } else {
596       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
597     }
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
606 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
607 {
608   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
609   PetscErrorCode ierr;
610   PetscInt       nstash,reallocs;
611   InsertMode     addv;
612 
613   PetscFunctionBegin;
614   if (aij->donotstash || mat->nooffprocentries) {
615     PetscFunctionReturn(0);
616   }
617 
618   /* make sure all processors are either in INSERTMODE or ADDMODE */
619   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
620   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
621   mat->insertmode = addv; /* in case this processor had no cache */
622 
623   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
624   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
625   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
631 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
635   PetscErrorCode ierr;
636   PetscMPIInt    n;
637   PetscInt       i,j,rstart,ncols,flg;
638   PetscInt       *row,*col;
639   PetscBool      other_disassembled;
640   PetscScalar    *val;
641   InsertMode     addv = mat->insertmode;
642 
643   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
644   PetscFunctionBegin;
645   if (!aij->donotstash && !mat->nooffprocentries) {
646     while (1) {
647       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
648       if (!flg) break;
649 
650       for (i=0; i<n;) {
651         /* Now identify the consecutive vals belonging to the same row */
652         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
653         if (j < n) ncols = j-i;
654         else       ncols = n-i;
655         /* Now assemble all these values with a single function call */
656         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
657         i = j;
658       }
659     }
660     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
661   }
662   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
664 
665   /* determine if any processor has disassembled, if so we must
666      also disassemble ourselfs, in order that we may reassemble. */
667   /*
668      if nonzero structure of submatrix B cannot change then we know that
669      no processor disassembled thus we can skip this stuff
670   */
671   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
672     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
673     if (mat->was_assembled && !other_disassembled) {
674       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
675     }
676   }
677   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
678     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
679   }
680   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
681   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
683   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
684 
685   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
686   aij->rowvalues = 0;
687 
688   /* used by MatAXPY() */
689   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
690   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
691 
692   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
693   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
699 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
700 {
701   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
702   PetscErrorCode ierr;
703 
704   PetscFunctionBegin;
705   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
706   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatZeroRows_MPIAIJ"
712 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
713 {
714   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode    ierr;
716   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
717   PetscInt          i,*owners = A->rmap->range;
718   PetscInt          *nprocs,j,idx,nsends,row;
719   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
720   PetscInt          *rvalues,count,base,slen,*source;
721   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
722   MPI_Comm          comm = ((PetscObject)A)->comm;
723   MPI_Request       *send_waits,*recv_waits;
724   MPI_Status        recv_status,*send_status;
725   const PetscScalar *xx;
726   PetscScalar       *bb;
727 #if defined(PETSC_DEBUG)
728   PetscBool      found = PETSC_FALSE;
729 #endif
730 
731   PetscFunctionBegin;
732   /*  first count number of contributors to each processor */
733   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
734   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
735   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
736   j = 0;
737   for (i=0; i<N; i++) {
738     if (lastidx > (idx = rows[i])) j = 0;
739     lastidx = idx;
740     for (; j<size; j++) {
741       if (idx >= owners[j] && idx < owners[j+1]) {
742         nprocs[2*j]++;
743         nprocs[2*j+1] = 1;
744         owner[i] = j;
745 #if defined(PETSC_DEBUG)
746         found = PETSC_TRUE;
747 #endif
748         break;
749       }
750     }
751 #if defined(PETSC_DEBUG)
752     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
753     found = PETSC_FALSE;
754 #endif
755   }
756   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
757 
758   if (A->nooffproczerorows) {
759     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
760     nrecvs = nsends;
761     nmax   = N;
762   } else {
763     /* inform other processors of number of messages and max length*/
764     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
765   }
766 
767   /* post receives:   */
768   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
769   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
770   for (i=0; i<nrecvs; i++) {
771     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
772   }
773 
774   /* do sends:
775       1) starts[i] gives the starting index in svalues for stuff going to
776          the ith processor
777   */
778   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
779   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
780   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
781   starts[0] = 0;
782   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
783   for (i=0; i<N; i++) {
784     svalues[starts[owner[i]]++] = rows[i];
785   }
786 
787   starts[0] = 0;
788   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
789   count = 0;
790   for (i=0; i<size; i++) {
791     if (nprocs[2*i+1]) {
792       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
793     }
794   }
795   ierr = PetscFree(starts);CHKERRQ(ierr);
796 
797   base = owners[rank];
798 
799   /*  wait on receives */
800   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
801   count  = nrecvs; slen = 0;
802   while (count) {
803     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
804     /* unpack receives into our local space */
805     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
806     source[imdex]  = recv_status.MPI_SOURCE;
807     lens[imdex]    = n;
808     slen          += n;
809     count--;
810   }
811   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
812 
813   /* move the data into the send scatter */
814   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
815   count = 0;
816   for (i=0; i<nrecvs; i++) {
817     values = rvalues + i*nmax;
818     for (j=0; j<lens[i]; j++) {
819       lrows[count++] = values[j] - base;
820     }
821   }
822   ierr = PetscFree(rvalues);CHKERRQ(ierr);
823   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
824   ierr = PetscFree(owner);CHKERRQ(ierr);
825   ierr = PetscFree(nprocs);CHKERRQ(ierr);
826 
827   /* fix right hand side if needed */
828   if (x && b) {
829     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
830     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
831     for (i=0; i<slen; i++) {
832       bb[lrows[i]] = diag*xx[lrows[i]];
833     }
834     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
835     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
836   }
837   /*
838         Zero the required rows. If the "diagonal block" of the matrix
839      is square and the user wishes to set the diagonal we use separate
840      code so that MatSetValues() is not called for each diagonal allocating
841      new memory, thus calling lots of mallocs and slowing things down.
842 
843   */
844   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
845   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
846   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
847     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
848   } else if (diag != 0.0) {
849     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
850     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
851       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
852 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
853     }
854     for (i = 0; i < slen; i++) {
855       row  = lrows[i] + rstart;
856       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
857     }
858     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
859     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860   } else {
861     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(lrows);CHKERRQ(ierr);
864 
865   /* wait on sends */
866   if (nsends) {
867     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
868     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
869     ierr = PetscFree(send_status);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(send_waits);CHKERRQ(ierr);
872   ierr = PetscFree(svalues);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
878 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
879 {
880   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
881   PetscErrorCode    ierr;
882   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
883   PetscInt          i,*owners = A->rmap->range;
884   PetscInt          *nprocs,j,idx,nsends;
885   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
886   PetscInt          *rvalues,count,base,slen,*source;
887   PetscInt          *lens,*lrows,*values,m;
888   MPI_Comm          comm = ((PetscObject)A)->comm;
889   MPI_Request       *send_waits,*recv_waits;
890   MPI_Status        recv_status,*send_status;
891   const PetscScalar *xx;
892   PetscScalar       *bb,*mask;
893   Vec               xmask,lmask;
894   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
895   const PetscInt    *aj, *ii,*ridx;
896   PetscScalar       *aa;
897 #if defined(PETSC_DEBUG)
898   PetscBool         found = PETSC_FALSE;
899 #endif
900 
901   PetscFunctionBegin;
902   /*  first count number of contributors to each processor */
903   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
904   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
905   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
906   j = 0;
907   for (i=0; i<N; i++) {
908     if (lastidx > (idx = rows[i])) j = 0;
909     lastidx = idx;
910     for (; j<size; j++) {
911       if (idx >= owners[j] && idx < owners[j+1]) {
912         nprocs[2*j]++;
913         nprocs[2*j+1] = 1;
914         owner[i] = j;
915 #if defined(PETSC_DEBUG)
916         found = PETSC_TRUE;
917 #endif
918         break;
919       }
920     }
921 #if defined(PETSC_DEBUG)
922     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
923     found = PETSC_FALSE;
924 #endif
925   }
926   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
927 
928   /* inform other processors of number of messages and max length*/
929   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
930 
931   /* post receives:   */
932   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
933   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
934   for (i=0; i<nrecvs; i++) {
935     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
936   }
937 
938   /* do sends:
939       1) starts[i] gives the starting index in svalues for stuff going to
940          the ith processor
941   */
942   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
943   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
944   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
945   starts[0] = 0;
946   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
947   for (i=0; i<N; i++) {
948     svalues[starts[owner[i]]++] = rows[i];
949   }
950 
951   starts[0] = 0;
952   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
953   count = 0;
954   for (i=0; i<size; i++) {
955     if (nprocs[2*i+1]) {
956       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
957     }
958   }
959   ierr = PetscFree(starts);CHKERRQ(ierr);
960 
961   base = owners[rank];
962 
963   /*  wait on receives */
964   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
965   count  = nrecvs; slen = 0;
966   while (count) {
967     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
968     /* unpack receives into our local space */
969     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
970     source[imdex]  = recv_status.MPI_SOURCE;
971     lens[imdex]    = n;
972     slen          += n;
973     count--;
974   }
975   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
976 
977   /* move the data into the send scatter */
978   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
979   count = 0;
980   for (i=0; i<nrecvs; i++) {
981     values = rvalues + i*nmax;
982     for (j=0; j<lens[i]; j++) {
983       lrows[count++] = values[j] - base;
984     }
985   }
986   ierr = PetscFree(rvalues);CHKERRQ(ierr);
987   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
988   ierr = PetscFree(owner);CHKERRQ(ierr);
989   ierr = PetscFree(nprocs);CHKERRQ(ierr);
990   /* lrows are the local rows to be zeroed, slen is the number of local rows */
991 
992   /* zero diagonal part of matrix */
993   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
994 
995   /* handle off diagonal part of matrix */
996   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
997   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
998   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
999   for (i=0; i<slen; i++) {
1000     bb[lrows[i]] = 1;
1001   }
1002   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1006   if (x) {
1007     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1010     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1011   }
1012   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1013 
1014   /* remove zeroed rows of off diagonal matrix */
1015   ii = aij->i;
1016   for (i=0; i<slen; i++) {
1017     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1018   }
1019 
1020   /* loop over all elements of off process part of matrix zeroing removed columns*/
1021   if (aij->compressedrow.use){
1022     m    = aij->compressedrow.nrows;
1023     ii   = aij->compressedrow.i;
1024     ridx = aij->compressedrow.rindex;
1025     for (i=0; i<m; i++){
1026       n   = ii[i+1] - ii[i];
1027       aj  = aij->j + ii[i];
1028       aa  = aij->a + ii[i];
1029 
1030       for (j=0; j<n; j++) {
1031         if (PetscAbsScalar(mask[*aj])) {
1032           if (b) bb[*ridx] -= *aa*xx[*aj];
1033           *aa        = 0.0;
1034         }
1035         aa++;
1036         aj++;
1037       }
1038       ridx++;
1039     }
1040   } else { /* do not use compressed row format */
1041     m = l->B->rmap->n;
1042     for (i=0; i<m; i++) {
1043       n   = ii[i+1] - ii[i];
1044       aj  = aij->j + ii[i];
1045       aa  = aij->a + ii[i];
1046       for (j=0; j<n; j++) {
1047         if (PetscAbsScalar(mask[*aj])) {
1048           if (b) bb[i] -= *aa*xx[*aj];
1049           *aa    = 0.0;
1050         }
1051         aa++;
1052         aj++;
1053       }
1054     }
1055   }
1056   if (x) {
1057     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1058     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1059   }
1060   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1061   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1062   ierr = PetscFree(lrows);CHKERRQ(ierr);
1063 
1064   /* wait on sends */
1065   if (nsends) {
1066     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1067     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1068     ierr = PetscFree(send_status);CHKERRQ(ierr);
1069   }
1070   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1071   ierr = PetscFree(svalues);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatMult_MPIAIJ"
1078 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082   PetscInt       nt;
1083 
1084   PetscFunctionBegin;
1085   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1086   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1087   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1089   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1096 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1108 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1123 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127   PetscBool      merged;
1128 
1129   PetscFunctionBegin;
1130   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1131   /* do nondiagonal part */
1132   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1133   if (!merged) {
1134     /* send it on its way */
1135     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136     /* do local part */
1137     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1138     /* receive remote parts: note this assumes the values are not actually */
1139     /* added in yy until the next line, */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   } else {
1142     /* do local part */
1143     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1144     /* send it on its way */
1145     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146     /* values actually were received in the Begin() but we need to call this nop */
1147     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   }
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 EXTERN_C_BEGIN
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1155 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1156 {
1157   MPI_Comm       comm;
1158   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1159   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1160   IS             Me,Notme;
1161   PetscErrorCode ierr;
1162   PetscInt       M,N,first,last,*notme,i;
1163   PetscMPIInt    size;
1164 
1165   PetscFunctionBegin;
1166 
1167   /* Easy test: symmetric diagonal block */
1168   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1169   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1170   if (!*f) PetscFunctionReturn(0);
1171   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1173   if (size == 1) PetscFunctionReturn(0);
1174 
1175   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1176   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1177   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1178   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1179   for (i=0; i<first; i++) notme[i] = i;
1180   for (i=last; i<M; i++) notme[i-last+first] = i;
1181   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1182   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1183   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1184   Aoff = Aoffs[0];
1185   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1186   Boff = Boffs[0];
1187   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1188   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1190   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1192   ierr = PetscFree(notme);CHKERRQ(ierr);
1193    PetscFunctionReturn(0);
1194 }
1195 EXTERN_C_END
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1199 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   /* do nondiagonal part */
1206   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1207   /* send it on its way */
1208   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1209   /* do local part */
1210   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1211   /* receive remote parts */
1212   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217   This only works correctly for square matrices where the subblock A->A is the
1218    diagonal block
1219 */
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1222 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1223 {
1224   PetscErrorCode ierr;
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226 
1227   PetscFunctionBegin;
1228   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1229   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1230   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatScale_MPIAIJ"
1236 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1237 {
1238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1243   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatDestroy_MPIAIJ"
1249 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1250 {
1251   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255 #if defined(PETSC_USE_LOG)
1256   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1257 #endif
1258   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1259   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1260   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1262 #if defined (PETSC_USE_CTABLE)
1263   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1264 #else
1265   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1266 #endif
1267   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1268   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1269   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1270   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1271   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1272   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1273 
1274   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1289 {
1290   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1291   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1292   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1293   PetscErrorCode    ierr;
1294   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1295   int               fd;
1296   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1297   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1298   PetscScalar       *column_values;
1299   PetscInt          message_count,flowcontrolcount;
1300 
1301   PetscFunctionBegin;
1302   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1303   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1304   nz   = A->nz + B->nz;
1305   if (!rank) {
1306     header[0] = MAT_FILE_CLASSID;
1307     header[1] = mat->rmap->N;
1308     header[2] = mat->cmap->N;
1309     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1310     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1311     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1312     /* get largest number of rows any processor has */
1313     rlen = mat->rmap->n;
1314     range = mat->rmap->range;
1315     for (i=1; i<size; i++) {
1316       rlen = PetscMax(rlen,range[i+1] - range[i]);
1317     }
1318   } else {
1319     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1320     rlen = mat->rmap->n;
1321   }
1322 
1323   /* load up the local row counts */
1324   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1325   for (i=0; i<mat->rmap->n; i++) {
1326     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1327   }
1328 
1329   /* store the row lengths to the file */
1330   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1331   if (!rank) {
1332     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1333     for (i=1; i<size; i++) {
1334       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1335       rlen = range[i+1] - range[i];
1336       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1342     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1343     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1344   }
1345   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1346 
1347   /* load up the local column indices */
1348   nzmax = nz; /* )th processor needs space a largest processor needs */
1349   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1350   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1351   cnt  = 0;
1352   for (i=0; i<mat->rmap->n; i++) {
1353     for (j=B->i[i]; j<B->i[i+1]; j++) {
1354       if ( (col = garray[B->j[j]]) > cstart) break;
1355       column_indices[cnt++] = col;
1356     }
1357     for (k=A->i[i]; k<A->i[i+1]; k++) {
1358       column_indices[cnt++] = A->j[k] + cstart;
1359     }
1360     for (; j<B->i[i+1]; j++) {
1361       column_indices[cnt++] = garray[B->j[j]];
1362     }
1363   }
1364   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1365 
1366   /* store the column indices to the file */
1367    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1368   if (!rank) {
1369     MPI_Status status;
1370     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1371     for (i=1; i<size; i++) {
1372       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1373       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1374       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1375       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1377     }
1378      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1379   } else {
1380     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1381     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1382     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1384   }
1385   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1386 
1387   /* load up the local column values */
1388   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1389   cnt  = 0;
1390   for (i=0; i<mat->rmap->n; i++) {
1391     for (j=B->i[i]; j<B->i[i+1]; j++) {
1392       if ( garray[B->j[j]] > cstart) break;
1393       column_values[cnt++] = B->a[j];
1394     }
1395     for (k=A->i[i]; k<A->i[i+1]; k++) {
1396       column_values[cnt++] = A->a[k];
1397     }
1398     for (; j<B->i[i+1]; j++) {
1399       column_values[cnt++] = B->a[j];
1400     }
1401   }
1402   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1403 
1404   /* store the column values to the file */
1405    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1406   if (!rank) {
1407     MPI_Status status;
1408     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1409     for (i=1; i<size; i++) {
1410        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1411       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1412       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1413       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1415     }
1416     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1417   } else {
1418     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1419     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1420     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1422   }
1423   ierr = PetscFree(column_values);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1429 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1430 {
1431   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1432   PetscErrorCode    ierr;
1433   PetscMPIInt       rank = aij->rank,size = aij->size;
1434   PetscBool         isdraw,iascii,isbinary;
1435   PetscViewer       sviewer;
1436   PetscViewerFormat format;
1437 
1438   PetscFunctionBegin;
1439   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1442   if (iascii) {
1443     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1444     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1445       MatInfo    info;
1446       PetscBool  inodes;
1447 
1448       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1449       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1450       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1451       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1452       if (!inodes) {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1454 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       } else {
1456         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1457 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1458       }
1459       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1461       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1466       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1467       PetscFunctionReturn(0);
1468     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1469       PetscInt   inodecount,inodelimit,*inodes;
1470       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1471       if (inodes) {
1472         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1473       } else {
1474         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1475       }
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1478       PetscFunctionReturn(0);
1479     }
1480   } else if (isbinary) {
1481     if (size == 1) {
1482       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1483       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1484     } else {
1485       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1486     }
1487     PetscFunctionReturn(0);
1488   } else if (isdraw) {
1489     PetscDraw  draw;
1490     PetscBool  isnull;
1491     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1492     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1493   }
1494 
1495   if (size == 1) {
1496     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1497     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1498   } else {
1499     /* assemble the entire matrix onto first processor. */
1500     Mat         A;
1501     Mat_SeqAIJ  *Aloc;
1502     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1503     MatScalar   *a;
1504 
1505     if (mat->rmap->N > 1024) {
1506       PetscBool  flg = PETSC_FALSE;
1507 
1508       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1509       if (!flg) {
1510         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1511       }
1512     }
1513 
1514     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1515     if (!rank) {
1516       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1517     } else {
1518       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1519     }
1520     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1521     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1522     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1523     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1524     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1525 
1526     /* copy over the A part */
1527     Aloc = (Mat_SeqAIJ*)aij->A->data;
1528     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1529     row = mat->rmap->rstart;
1530     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1531     for (i=0; i<m; i++) {
1532       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1533       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1534     }
1535     aj = Aloc->j;
1536     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1537 
1538     /* copy over the B part */
1539     Aloc = (Mat_SeqAIJ*)aij->B->data;
1540     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1541     row  = mat->rmap->rstart;
1542     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1543     ct   = cols;
1544     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1545     for (i=0; i<m; i++) {
1546       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1547       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1548     }
1549     ierr = PetscFree(ct);CHKERRQ(ierr);
1550     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     /*
1553        Everyone has to call to draw the matrix since the graphics waits are
1554        synchronized across all processors that share the PetscDraw object
1555     */
1556     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1557     if (!rank) {
1558       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1559       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1560       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1561       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1562     }
1563     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1564     ierr = MatDestroy(&A);CHKERRQ(ierr);
1565   }
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatView_MPIAIJ"
1571 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1572 {
1573   PetscErrorCode ierr;
1574   PetscBool      iascii,isdraw,issocket,isbinary;
1575 
1576   PetscFunctionBegin;
1577   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1579   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1581   if (iascii || isdraw || isbinary || issocket) {
1582     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1583   } else {
1584     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSOR_MPIAIJ"
1591 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1592 {
1593   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1594   PetscErrorCode ierr;
1595   Vec            bb1 = 0;
1596   PetscBool      hasop;
1597 
1598   PetscFunctionBegin;
1599   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1600     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1601   }
1602 
1603   if (flag == SOR_APPLY_UPPER) {
1604     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1605     PetscFunctionReturn(0);
1606   }
1607 
1608   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1609     if (flag & SOR_ZERO_INITIAL_GUESS) {
1610       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1611       its--;
1612     }
1613 
1614     while (its--) {
1615       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617 
1618       /* update rhs: bb1 = bb - B*x */
1619       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1620       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1621 
1622       /* local sweep */
1623       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1624     }
1625   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1626     if (flag & SOR_ZERO_INITIAL_GUESS) {
1627       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1628       its--;
1629     }
1630     while (its--) {
1631       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634       /* update rhs: bb1 = bb - B*x */
1635       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1636       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1637 
1638       /* local sweep */
1639       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1640     }
1641   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1642     if (flag & SOR_ZERO_INITIAL_GUESS) {
1643       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1644       its--;
1645     }
1646     while (its--) {
1647       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649 
1650       /* update rhs: bb1 = bb - B*x */
1651       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1652       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1653 
1654       /* local sweep */
1655       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1656     }
1657   }  else if (flag & SOR_EISENSTAT) {
1658     Vec         xx1;
1659 
1660     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1662 
1663     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     if (!mat->diag) {
1666       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1667       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1668     }
1669     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1670     if (hasop) {
1671       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1672     } else {
1673       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1674     }
1675     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1676 
1677     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1678 
1679     /* local sweep */
1680     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1681     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1682     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1683   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1684 
1685   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatPermute_MPIAIJ"
1691 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1692 {
1693   MPI_Comm       comm;
1694   PetscInt       first,local_rowsize,local_colsize;
1695   const PetscInt *rows;
1696   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1697   PetscErrorCode ierr;
1698 
1699   PetscFunctionBegin;
1700   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1701   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1702   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1703   /* collect the global row permutation and invert it */
1704   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1705   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1706   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1707   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1709   /* get the local target indices */
1710   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1711   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1712   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1713   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1714   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1716   /* the column permutation is so much easier;
1717      make a local version of 'colp' and invert it */
1718   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1719   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1720   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1721   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1722   /* now we just get the submatrix */
1723   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1724   /* clean up */
1725   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1726   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1732 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1733 {
1734   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1735   Mat            A = mat->A,B = mat->B;
1736   PetscErrorCode ierr;
1737   PetscReal      isend[5],irecv[5];
1738 
1739   PetscFunctionBegin;
1740   info->block_size     = 1.0;
1741   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1743   isend[3] = info->memory;  isend[4] = info->mallocs;
1744   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1745   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1746   isend[3] += info->memory;  isend[4] += info->mallocs;
1747   if (flag == MAT_LOCAL) {
1748     info->nz_used      = isend[0];
1749     info->nz_allocated = isend[1];
1750     info->nz_unneeded  = isend[2];
1751     info->memory       = isend[3];
1752     info->mallocs      = isend[4];
1753   } else if (flag == MAT_GLOBAL_MAX) {
1754     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1755     info->nz_used      = irecv[0];
1756     info->nz_allocated = irecv[1];
1757     info->nz_unneeded  = irecv[2];
1758     info->memory       = irecv[3];
1759     info->mallocs      = irecv[4];
1760   } else if (flag == MAT_GLOBAL_SUM) {
1761     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1762     info->nz_used      = irecv[0];
1763     info->nz_allocated = irecv[1];
1764     info->nz_unneeded  = irecv[2];
1765     info->memory       = irecv[3];
1766     info->mallocs      = irecv[4];
1767   }
1768   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1769   info->fill_ratio_needed = 0;
1770   info->factor_mallocs    = 0;
1771 
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatSetOption_MPIAIJ"
1777 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1778 {
1779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1780   PetscErrorCode ierr;
1781 
1782   PetscFunctionBegin;
1783   switch (op) {
1784   case MAT_NEW_NONZERO_LOCATIONS:
1785   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1786   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1787   case MAT_KEEP_NONZERO_PATTERN:
1788   case MAT_NEW_NONZERO_LOCATION_ERR:
1789   case MAT_USE_INODES:
1790   case MAT_IGNORE_ZERO_ENTRIES:
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_ROW_ORIENTED:
1795     a->roworiented = flg;
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1798     break;
1799   case MAT_NEW_DIAGONALS:
1800     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1801     break;
1802   case MAT_IGNORE_OFF_PROC_ENTRIES:
1803     a->donotstash = flg;
1804     break;
1805   case MAT_SPD:
1806     A->spd_set                         = PETSC_TRUE;
1807     A->spd                             = flg;
1808     if (flg) {
1809       A->symmetric                     = PETSC_TRUE;
1810       A->structurally_symmetric        = PETSC_TRUE;
1811       A->symmetric_set                 = PETSC_TRUE;
1812       A->structurally_symmetric_set    = PETSC_TRUE;
1813     }
1814     break;
1815   case MAT_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_STRUCTURALLY_SYMMETRIC:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_HERMITIAN:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_SYMMETRY_ETERNAL:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   default:
1828     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1829   }
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatGetRow_MPIAIJ"
1835 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1836 {
1837   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1838   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1839   PetscErrorCode ierr;
1840   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1841   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1842   PetscInt       *cmap,*idx_p;
1843 
1844   PetscFunctionBegin;
1845   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1846   mat->getrowactive = PETSC_TRUE;
1847 
1848   if (!mat->rowvalues && (idx || v)) {
1849     /*
1850         allocate enough space to hold information from the longest row.
1851     */
1852     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1853     PetscInt   max = 1,tmp;
1854     for (i=0; i<matin->rmap->n; i++) {
1855       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1856       if (max < tmp) { max = tmp; }
1857     }
1858     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1859   }
1860 
1861   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1862   lrow = row - rstart;
1863 
1864   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1865   if (!v)   {pvA = 0; pvB = 0;}
1866   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1867   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1868   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1869   nztot = nzA + nzB;
1870 
1871   cmap  = mat->garray;
1872   if (v  || idx) {
1873     if (nztot) {
1874       /* Sort by increasing column numbers, assuming A and B already sorted */
1875       PetscInt imark = -1;
1876       if (v) {
1877         *v = v_p = mat->rowvalues;
1878         for (i=0; i<nzB; i++) {
1879           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1880           else break;
1881         }
1882         imark = i;
1883         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1884         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1885       }
1886       if (idx) {
1887         *idx = idx_p = mat->rowindices;
1888         if (imark > -1) {
1889           for (i=0; i<imark; i++) {
1890             idx_p[i] = cmap[cworkB[i]];
1891           }
1892         } else {
1893           for (i=0; i<nzB; i++) {
1894             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1895             else break;
1896           }
1897           imark = i;
1898         }
1899         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1900         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1901       }
1902     } else {
1903       if (idx) *idx = 0;
1904       if (v)   *v   = 0;
1905     }
1906   }
1907   *nz = nztot;
1908   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1909   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1916 {
1917   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1918 
1919   PetscFunctionBegin;
1920   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1921   aij->getrowactive = PETSC_FALSE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatNorm_MPIAIJ"
1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1928 {
1929   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1930   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1931   PetscErrorCode ierr;
1932   PetscInt       i,j,cstart = mat->cmap->rstart;
1933   PetscReal      sum = 0.0;
1934   MatScalar      *v;
1935 
1936   PetscFunctionBegin;
1937   if (aij->size == 1) {
1938     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1939   } else {
1940     if (type == NORM_FROBENIUS) {
1941       v = amat->a;
1942       for (i=0; i<amat->nz; i++) {
1943 #if defined(PETSC_USE_COMPLEX)
1944         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1945 #else
1946         sum += (*v)*(*v); v++;
1947 #endif
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951 #if defined(PETSC_USE_COMPLEX)
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953 #else
1954         sum += (*v)*(*v); v++;
1955 #endif
1956       }
1957       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1958       *norm = PetscSqrtReal(*norm);
1959     } else if (type == NORM_1) { /* max column norm */
1960       PetscReal *tmp,*tmp2;
1961       PetscInt  *jj,*garray = aij->garray;
1962       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1964       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1965       *norm = 0.0;
1966       v = amat->a; jj = amat->j;
1967       for (j=0; j<amat->nz; j++) {
1968         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1969       }
1970       v = bmat->a; jj = bmat->j;
1971       for (j=0; j<bmat->nz; j++) {
1972         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1973       }
1974       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975       for (j=0; j<mat->cmap->N; j++) {
1976         if (tmp2[j] > *norm) *norm = tmp2[j];
1977       }
1978       ierr = PetscFree(tmp);CHKERRQ(ierr);
1979       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1980     } else if (type == NORM_INFINITY) { /* max row norm */
1981       PetscReal ntemp = 0.0;
1982       for (j=0; j<aij->A->rmap->n; j++) {
1983         v = amat->a + amat->i[j];
1984         sum = 0.0;
1985         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1986           sum += PetscAbsScalar(*v); v++;
1987         }
1988         v = bmat->a + bmat->i[j];
1989         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1990           sum += PetscAbsScalar(*v); v++;
1991         }
1992         if (sum > ntemp) ntemp = sum;
1993       }
1994       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1995     } else {
1996       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1997     }
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatTranspose_MPIAIJ"
2004 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2005 {
2006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2007   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2008   PetscErrorCode ierr;
2009   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2010   PetscInt       cstart=A->cmap->rstart,ncol;
2011   Mat            B;
2012   MatScalar      *array;
2013 
2014   PetscFunctionBegin;
2015   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2016 
2017   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2018   ai = Aloc->i; aj = Aloc->j;
2019   bi = Bloc->i; bj = Bloc->j;
2020   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2021     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2022     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2023     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2024     for (i=0; i<ai[ma]; i++){
2025       d_nnz[aj[i]] ++;
2026       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2027     }
2028 
2029     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2030     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2031     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2032     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2033     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2034     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2035   } else {
2036     B = *matout;
2037   }
2038 
2039   /* copy over the A part */
2040   array = Aloc->a;
2041   row = A->rmap->rstart;
2042   for (i=0; i<ma; i++) {
2043     ncol = ai[i+1]-ai[i];
2044     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2045     row++; array += ncol; aj += ncol;
2046   }
2047   aj = Aloc->j;
2048   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2049 
2050   /* copy over the B part */
2051   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2052   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2053   array = Bloc->a;
2054   row = A->rmap->rstart;
2055   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2056   cols_tmp = cols;
2057   for (i=0; i<mb; i++) {
2058     ncol = bi[i+1]-bi[i];
2059     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2060     row++; array += ncol; cols_tmp += ncol;
2061   }
2062   ierr = PetscFree(cols);CHKERRQ(ierr);
2063 
2064   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2067     *matout = B;
2068   } else {
2069     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2076 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2077 {
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            a = aij->A,b = aij->B;
2080   PetscErrorCode ierr;
2081   PetscInt       s1,s2,s3;
2082 
2083   PetscFunctionBegin;
2084   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2085   if (rr) {
2086     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2087     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2088     /* Overlap communication with computation. */
2089     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090   }
2091   if (ll) {
2092     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2093     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2094     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2095   }
2096   /* scale  the diagonal block */
2097   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2098 
2099   if (rr) {
2100     /* Do a scatter end and then right scale the off-diagonal block */
2101     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2103   }
2104 
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2110 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2111 {
2112   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2117   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2118   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2119   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2125 {
2126   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatEqual_MPIAIJ"
2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2137 {
2138   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2139   Mat            a,b,c,d;
2140   PetscBool      flg;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   a = matA->A; b = matA->B;
2145   c = matB->A; d = matB->B;
2146 
2147   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2148   if (flg) {
2149     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2150   }
2151   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatCopy_MPIAIJ"
2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2158 {
2159   PetscErrorCode ierr;
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2161   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2162 
2163   PetscFunctionBegin;
2164   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2165   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2166     /* because of the column compression in the off-processor part of the matrix a->B,
2167        the number of columns in a->B and b->B may be different, hence we cannot call
2168        the MatCopy() directly on the two parts. If need be, we can provide a more
2169        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2170        then copying the submatrices */
2171     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2172   } else {
2173     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2174     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatSetUp_MPIAIJ"
2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatAXPY_MPIAIJ"
2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2193 {
2194   PetscErrorCode ierr;
2195   PetscInt       i;
2196   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2197   PetscBLASInt   bnz,one=1;
2198   Mat_SeqAIJ     *x,*y;
2199 
2200   PetscFunctionBegin;
2201   if (str == SAME_NONZERO_PATTERN) {
2202     PetscScalar alpha = a;
2203     x = (Mat_SeqAIJ *)xx->A->data;
2204     y = (Mat_SeqAIJ *)yy->A->data;
2205     bnz = PetscBLASIntCast(x->nz);
2206     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2207     x = (Mat_SeqAIJ *)xx->B->data;
2208     y = (Mat_SeqAIJ *)yy->B->data;
2209     bnz = PetscBLASIntCast(x->nz);
2210     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2211   } else if (str == SUBSET_NONZERO_PATTERN) {
2212     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2213 
2214     x = (Mat_SeqAIJ *)xx->B->data;
2215     y = (Mat_SeqAIJ *)yy->B->data;
2216     if (y->xtoy && y->XtoY != xx->B) {
2217       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2218       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2219     }
2220     if (!y->xtoy) { /* get xtoy */
2221       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2222       y->XtoY = xx->B;
2223       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2224     }
2225     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2226   } else {
2227     Mat B;
2228     PetscInt *nnz_d,*nnz_o;
2229     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2230     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2231     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2232     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2233     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2234     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2235     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2236     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2237     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2238     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2239     ierr = MatHeaderReplace(Y,B);
2240     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2241     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatConjugate_MPIAIJ"
2250 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2251 {
2252 #if defined(PETSC_USE_COMPLEX)
2253   PetscErrorCode ierr;
2254   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2255 
2256   PetscFunctionBegin;
2257   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2258   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2259 #else
2260   PetscFunctionBegin;
2261 #endif
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatRealPart_MPIAIJ"
2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2274   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2280 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2287   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #ifdef PETSC_HAVE_PBGL
2292 
2293 #include <boost/parallel/mpi/bsp_process_group.hpp>
2294 #include <boost/graph/distributed/ilu_default_graph.hpp>
2295 #include <boost/graph/distributed/ilu_0_block.hpp>
2296 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2297 #include <boost/graph/distributed/petsc/interface.hpp>
2298 #include <boost/multi_array.hpp>
2299 #include <boost/parallel/distributed_property_map->hpp>
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2303 /*
2304   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2305 */
2306 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2307 {
2308   namespace petsc = boost::distributed::petsc;
2309 
2310   namespace graph_dist = boost::graph::distributed;
2311   using boost::graph::distributed::ilu_default::process_group_type;
2312   using boost::graph::ilu_permuted;
2313 
2314   PetscBool       row_identity, col_identity;
2315   PetscContainer  c;
2316   PetscInt        m, n, M, N;
2317   PetscErrorCode  ierr;
2318 
2319   PetscFunctionBegin;
2320   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2321   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2322   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2323   if (!row_identity || !col_identity) {
2324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2325   }
2326 
2327   process_group_type pg;
2328   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2329   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2330   lgraph_type&   level_graph = *lgraph_p;
2331   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2332 
2333   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2334   ilu_permuted(level_graph);
2335 
2336   /* put together the new matrix */
2337   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2338   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2339   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2340   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2341   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2342   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344 
2345   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2346   ierr = PetscContainerSetPointer(c, lgraph_p);
2347   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2348   ierr = PetscContainerDestroy(&c);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2354 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2355 {
2356   PetscFunctionBegin;
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatSolve_MPIAIJ"
2362 /*
2363   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2364 */
2365 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2366 {
2367   namespace graph_dist = boost::graph::distributed;
2368 
2369   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2370   lgraph_type*   lgraph_p;
2371   PetscContainer c;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2376   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2377   ierr = VecCopy(b, x);CHKERRQ(ierr);
2378 
2379   PetscScalar* array_x;
2380   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2381   PetscInt sx;
2382   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2383 
2384   PetscScalar* array_b;
2385   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2386   PetscInt sb;
2387   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2388 
2389   lgraph_type&   level_graph = *lgraph_p;
2390   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2391 
2392   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2393   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2394                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2395 
2396   typedef boost::iterator_property_map<array_ref_type::iterator,
2397                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2398   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2399                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2400 
2401   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2402 
2403   PetscFunctionReturn(0);
2404 }
2405 #endif
2406 
2407 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2408   PetscInt       nzlocal,nsends,nrecvs;
2409   PetscMPIInt    *send_rank,*recv_rank;
2410   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2411   PetscScalar    *sbuf_a,**rbuf_a;
2412   PetscErrorCode (*Destroy)(Mat);
2413 } Mat_Redundant;
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2417 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2418 {
2419   PetscErrorCode       ierr;
2420   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2421   PetscInt             i;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2425   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2426   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2427   for (i=0; i<redund->nrecvs; i++){
2428     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2429     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2430   }
2431   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2432   ierr = PetscFree(redund);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatDestroy_MatRedundant"
2438 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2439 {
2440   PetscErrorCode  ierr;
2441   PetscContainer  container;
2442   Mat_Redundant   *redund=PETSC_NULL;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2446   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2447   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2448   A->ops->destroy = redund->Destroy;
2449   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2450   if (A->ops->destroy) {
2451     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2458 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2459 {
2460   PetscMPIInt    rank,size;
2461   MPI_Comm       comm=((PetscObject)mat)->comm;
2462   PetscErrorCode ierr;
2463   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2464   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2465   PetscInt       *rowrange=mat->rmap->range;
2466   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2467   Mat            A=aij->A,B=aij->B,C=*matredundant;
2468   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2469   PetscScalar    *sbuf_a;
2470   PetscInt       nzlocal=a->nz+b->nz;
2471   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2472   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2473   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2474   MatScalar      *aworkA,*aworkB;
2475   PetscScalar    *vals;
2476   PetscMPIInt    tag1,tag2,tag3,imdex;
2477   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2478                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2479   MPI_Status     recv_status,*send_status;
2480   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2481   PetscInt       **rbuf_j=PETSC_NULL;
2482   PetscScalar    **rbuf_a=PETSC_NULL;
2483   Mat_Redundant  *redund=PETSC_NULL;
2484   PetscContainer container;
2485 
2486   PetscFunctionBegin;
2487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2488   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2489 
2490   if (reuse == MAT_REUSE_MATRIX) {
2491     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2492     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2493     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2494     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2495     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2496     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2497     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2498     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2499 
2500     nsends    = redund->nsends;
2501     nrecvs    = redund->nrecvs;
2502     send_rank = redund->send_rank;
2503     recv_rank = redund->recv_rank;
2504     sbuf_nz   = redund->sbuf_nz;
2505     rbuf_nz   = redund->rbuf_nz;
2506     sbuf_j    = redund->sbuf_j;
2507     sbuf_a    = redund->sbuf_a;
2508     rbuf_j    = redund->rbuf_j;
2509     rbuf_a    = redund->rbuf_a;
2510   }
2511 
2512   if (reuse == MAT_INITIAL_MATRIX){
2513     PetscMPIInt  subrank,subsize;
2514     PetscInt     nleftover,np_subcomm;
2515     /* get the destination processors' id send_rank, nsends and nrecvs */
2516     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2517     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2518     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2519     np_subcomm = size/nsubcomm;
2520     nleftover  = size - nsubcomm*np_subcomm;
2521     nsends = 0; nrecvs = 0;
2522     for (i=0; i<size; i++){ /* i=rank*/
2523       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2524         send_rank[nsends] = i; nsends++;
2525         recv_rank[nrecvs++] = i;
2526       }
2527     }
2528     if (rank >= size - nleftover){/* this proc is a leftover processor */
2529       i = size-nleftover-1;
2530       j = 0;
2531       while (j < nsubcomm - nleftover){
2532         send_rank[nsends++] = i;
2533         i--; j++;
2534       }
2535     }
2536 
2537     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2538       for (i=0; i<nleftover; i++){
2539         recv_rank[nrecvs++] = size-nleftover+i;
2540       }
2541     }
2542 
2543     /* allocate sbuf_j, sbuf_a */
2544     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2545     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2546     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2547   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2548 
2549   /* copy mat's local entries into the buffers */
2550   if (reuse == MAT_INITIAL_MATRIX){
2551     rownz_max = 0;
2552     rptr = sbuf_j;
2553     cols = sbuf_j + rend-rstart + 1;
2554     vals = sbuf_a;
2555     rptr[0] = 0;
2556     for (i=0; i<rend-rstart; i++){
2557       row = i + rstart;
2558       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2559       ncols  = nzA + nzB;
2560       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2561       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2562       /* load the column indices for this row into cols */
2563       lwrite = 0;
2564       for (l=0; l<nzB; l++) {
2565         if ((ctmp = bmap[cworkB[l]]) < cstart){
2566           vals[lwrite]   = aworkB[l];
2567           cols[lwrite++] = ctmp;
2568         }
2569       }
2570       for (l=0; l<nzA; l++){
2571         vals[lwrite]   = aworkA[l];
2572         cols[lwrite++] = cstart + cworkA[l];
2573       }
2574       for (l=0; l<nzB; l++) {
2575         if ((ctmp = bmap[cworkB[l]]) >= cend){
2576           vals[lwrite]   = aworkB[l];
2577           cols[lwrite++] = ctmp;
2578         }
2579       }
2580       vals += ncols;
2581       cols += ncols;
2582       rptr[i+1] = rptr[i] + ncols;
2583       if (rownz_max < ncols) rownz_max = ncols;
2584     }
2585     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2586   } else { /* only copy matrix values into sbuf_a */
2587     rptr = sbuf_j;
2588     vals = sbuf_a;
2589     rptr[0] = 0;
2590     for (i=0; i<rend-rstart; i++){
2591       row = i + rstart;
2592       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2593       ncols  = nzA + nzB;
2594       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2595       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2596       lwrite = 0;
2597       for (l=0; l<nzB; l++) {
2598         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2599       }
2600       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2601       for (l=0; l<nzB; l++) {
2602         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2603       }
2604       vals += ncols;
2605       rptr[i+1] = rptr[i] + ncols;
2606     }
2607   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2608 
2609   /* send nzlocal to others, and recv other's nzlocal */
2610   /*--------------------------------------------------*/
2611   if (reuse == MAT_INITIAL_MATRIX){
2612     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2613     s_waits2 = s_waits3 + nsends;
2614     s_waits1 = s_waits2 + nsends;
2615     r_waits1 = s_waits1 + nsends;
2616     r_waits2 = r_waits1 + nrecvs;
2617     r_waits3 = r_waits2 + nrecvs;
2618   } else {
2619     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2620     r_waits3 = s_waits3 + nsends;
2621   }
2622 
2623   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     /* get new tags to keep the communication clean */
2626     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2627     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2628     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2629 
2630     /* post receives of other's nzlocal */
2631     for (i=0; i<nrecvs; i++){
2632       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2633     }
2634     /* send nzlocal to others */
2635     for (i=0; i<nsends; i++){
2636       sbuf_nz[i] = nzlocal;
2637       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2638     }
2639     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2640     count = nrecvs;
2641     while (count) {
2642       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2643       recv_rank[imdex] = recv_status.MPI_SOURCE;
2644       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2645       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2646 
2647       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2648       rbuf_nz[imdex] += i + 2;
2649       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2650       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2651       count--;
2652     }
2653     /* wait on sends of nzlocal */
2654     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2655     /* send mat->i,j to others, and recv from other's */
2656     /*------------------------------------------------*/
2657     for (i=0; i<nsends; i++){
2658       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2659       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2660     }
2661     /* wait on receives of mat->i,j */
2662     /*------------------------------*/
2663     count = nrecvs;
2664     while (count) {
2665       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2666       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2667       count--;
2668     }
2669     /* wait on sends of mat->i,j */
2670     /*---------------------------*/
2671     if (nsends) {
2672       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2673     }
2674   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2675 
2676   /* post receives, send and receive mat->a */
2677   /*----------------------------------------*/
2678   for (imdex=0; imdex<nrecvs; imdex++) {
2679     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2680   }
2681   for (i=0; i<nsends; i++){
2682     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2683   }
2684   count = nrecvs;
2685   while (count) {
2686     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2687     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2688     count--;
2689   }
2690   if (nsends) {
2691     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2692   }
2693 
2694   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2695 
2696   /* create redundant matrix */
2697   /*-------------------------*/
2698   if (reuse == MAT_INITIAL_MATRIX){
2699     /* compute rownz_max for preallocation */
2700     for (imdex=0; imdex<nrecvs; imdex++){
2701       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2702       rptr = rbuf_j[imdex];
2703       for (i=0; i<j; i++){
2704         ncols = rptr[i+1] - rptr[i];
2705         if (rownz_max < ncols) rownz_max = ncols;
2706       }
2707     }
2708 
2709     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2710     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2711     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2712     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2713     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2714   } else {
2715     C = *matredundant;
2716   }
2717 
2718   /* insert local matrix entries */
2719   rptr = sbuf_j;
2720   cols = sbuf_j + rend-rstart + 1;
2721   vals = sbuf_a;
2722   for (i=0; i<rend-rstart; i++){
2723     row   = i + rstart;
2724     ncols = rptr[i+1] - rptr[i];
2725     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2726     vals += ncols;
2727     cols += ncols;
2728   }
2729   /* insert received matrix entries */
2730   for (imdex=0; imdex<nrecvs; imdex++){
2731     rstart = rowrange[recv_rank[imdex]];
2732     rend   = rowrange[recv_rank[imdex]+1];
2733     rptr = rbuf_j[imdex];
2734     cols = rbuf_j[imdex] + rend-rstart + 1;
2735     vals = rbuf_a[imdex];
2736     for (i=0; i<rend-rstart; i++){
2737       row   = i + rstart;
2738       ncols = rptr[i+1] - rptr[i];
2739       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2740       vals += ncols;
2741       cols += ncols;
2742     }
2743   }
2744   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2747   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2748   if (reuse == MAT_INITIAL_MATRIX) {
2749     PetscContainer container;
2750     *matredundant = C;
2751     /* create a supporting struct and attach it to C for reuse */
2752     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2753     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2754     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2755     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2756     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2757     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2758 
2759     redund->nzlocal = nzlocal;
2760     redund->nsends  = nsends;
2761     redund->nrecvs  = nrecvs;
2762     redund->send_rank = send_rank;
2763     redund->recv_rank = recv_rank;
2764     redund->sbuf_nz = sbuf_nz;
2765     redund->rbuf_nz = rbuf_nz;
2766     redund->sbuf_j  = sbuf_j;
2767     redund->sbuf_a  = sbuf_a;
2768     redund->rbuf_j  = rbuf_j;
2769     redund->rbuf_a  = rbuf_a;
2770 
2771     redund->Destroy = C->ops->destroy;
2772     C->ops->destroy = MatDestroy_MatRedundant;
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2779 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2780 {
2781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2782   PetscErrorCode ierr;
2783   PetscInt       i,*idxb = 0;
2784   PetscScalar    *va,*vb;
2785   Vec            vtmp;
2786 
2787   PetscFunctionBegin;
2788   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2789   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2790   if (idx) {
2791     for (i=0; i<A->rmap->n; i++) {
2792       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2793     }
2794   }
2795 
2796   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2797   if (idx) {
2798     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2799   }
2800   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2801   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2802 
2803   for (i=0; i<A->rmap->n; i++){
2804     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2805       va[i] = vb[i];
2806       if (idx) idx[i] = a->garray[idxb[i]];
2807     }
2808   }
2809 
2810   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2811   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2812   ierr = PetscFree(idxb);CHKERRQ(ierr);
2813   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2819 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2820 {
2821   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2822   PetscErrorCode ierr;
2823   PetscInt       i,*idxb = 0;
2824   PetscScalar    *va,*vb;
2825   Vec            vtmp;
2826 
2827   PetscFunctionBegin;
2828   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2829   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2830   if (idx) {
2831     for (i=0; i<A->cmap->n; i++) {
2832       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2833     }
2834   }
2835 
2836   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2837   if (idx) {
2838     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2839   }
2840   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2841   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2842 
2843   for (i=0; i<A->rmap->n; i++){
2844     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2845       va[i] = vb[i];
2846       if (idx) idx[i] = a->garray[idxb[i]];
2847     }
2848   }
2849 
2850   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2851   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2852   ierr = PetscFree(idxb);CHKERRQ(ierr);
2853   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2859 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2860 {
2861   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2862   PetscInt       n      = A->rmap->n;
2863   PetscInt       cstart = A->cmap->rstart;
2864   PetscInt      *cmap   = mat->garray;
2865   PetscInt      *diagIdx, *offdiagIdx;
2866   Vec            diagV, offdiagV;
2867   PetscScalar   *a, *diagA, *offdiagA;
2868   PetscInt       r;
2869   PetscErrorCode ierr;
2870 
2871   PetscFunctionBegin;
2872   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2873   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2874   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2875   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2876   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2877   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2878   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2879   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2880   for(r = 0; r < n; ++r) {
2881     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2882       a[r]   = diagA[r];
2883       idx[r] = cstart + diagIdx[r];
2884     } else {
2885       a[r]   = offdiagA[r];
2886       idx[r] = cmap[offdiagIdx[r]];
2887     }
2888   }
2889   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2890   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2892   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2893   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2894   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 #undef __FUNCT__
2899 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2900 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2901 {
2902   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2903   PetscInt       n      = A->rmap->n;
2904   PetscInt       cstart = A->cmap->rstart;
2905   PetscInt      *cmap   = mat->garray;
2906   PetscInt      *diagIdx, *offdiagIdx;
2907   Vec            diagV, offdiagV;
2908   PetscScalar   *a, *diagA, *offdiagA;
2909   PetscInt       r;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2915   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2916   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2917   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2918   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2919   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2920   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2921   for(r = 0; r < n; ++r) {
2922     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2923       a[r]   = diagA[r];
2924       idx[r] = cstart + diagIdx[r];
2925     } else {
2926       a[r]   = offdiagA[r];
2927       idx[r] = cmap[offdiagIdx[r]];
2928     }
2929   }
2930   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2932   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2933   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2934   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2935   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2941 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2942 {
2943   PetscErrorCode ierr;
2944   Mat            *dummy;
2945 
2946   PetscFunctionBegin;
2947   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2948   *newmat = *dummy;
2949   ierr = PetscFree(dummy);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2957 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2958 {
2959   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 
2968 /* -------------------------------------------------------------------*/
2969 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2970        MatGetRow_MPIAIJ,
2971        MatRestoreRow_MPIAIJ,
2972        MatMult_MPIAIJ,
2973 /* 4*/ MatMultAdd_MPIAIJ,
2974        MatMultTranspose_MPIAIJ,
2975        MatMultTransposeAdd_MPIAIJ,
2976 #ifdef PETSC_HAVE_PBGL
2977        MatSolve_MPIAIJ,
2978 #else
2979        0,
2980 #endif
2981        0,
2982        0,
2983 /*10*/ 0,
2984        0,
2985        0,
2986        MatSOR_MPIAIJ,
2987        MatTranspose_MPIAIJ,
2988 /*15*/ MatGetInfo_MPIAIJ,
2989        MatEqual_MPIAIJ,
2990        MatGetDiagonal_MPIAIJ,
2991        MatDiagonalScale_MPIAIJ,
2992        MatNorm_MPIAIJ,
2993 /*20*/ MatAssemblyBegin_MPIAIJ,
2994        MatAssemblyEnd_MPIAIJ,
2995        MatSetOption_MPIAIJ,
2996        MatZeroEntries_MPIAIJ,
2997 /*24*/ MatZeroRows_MPIAIJ,
2998        0,
2999 #ifdef PETSC_HAVE_PBGL
3000        0,
3001 #else
3002        0,
3003 #endif
3004        0,
3005        0,
3006 /*29*/ MatSetUp_MPIAIJ,
3007 #ifdef PETSC_HAVE_PBGL
3008        0,
3009 #else
3010        0,
3011 #endif
3012        0,
3013        0,
3014        0,
3015 /*34*/ MatDuplicate_MPIAIJ,
3016        0,
3017        0,
3018        0,
3019        0,
3020 /*39*/ MatAXPY_MPIAIJ,
3021        MatGetSubMatrices_MPIAIJ,
3022        MatIncreaseOverlap_MPIAIJ,
3023        MatGetValues_MPIAIJ,
3024        MatCopy_MPIAIJ,
3025 /*44*/ MatGetRowMax_MPIAIJ,
3026        MatScale_MPIAIJ,
3027        0,
3028        0,
3029        MatZeroRowsColumns_MPIAIJ,
3030 /*49*/ MatSetBlockSize_MPIAIJ,
3031        0,
3032        0,
3033        0,
3034        0,
3035 /*54*/ MatFDColoringCreate_MPIAIJ,
3036        0,
3037        MatSetUnfactored_MPIAIJ,
3038        MatPermute_MPIAIJ,
3039        0,
3040 /*59*/ MatGetSubMatrix_MPIAIJ,
3041        MatDestroy_MPIAIJ,
3042        MatView_MPIAIJ,
3043        0,
3044        0,
3045 /*64*/ 0,
3046        0,
3047        0,
3048        0,
3049        0,
3050 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3051        MatGetRowMinAbs_MPIAIJ,
3052        0,
3053        MatSetColoring_MPIAIJ,
3054 #if defined(PETSC_HAVE_ADIC)
3055        MatSetValuesAdic_MPIAIJ,
3056 #else
3057        0,
3058 #endif
3059        MatSetValuesAdifor_MPIAIJ,
3060 /*75*/ MatFDColoringApply_AIJ,
3061        0,
3062        0,
3063        0,
3064        0,
3065 /*80*/ 0,
3066        0,
3067        0,
3068 /*83*/ MatLoad_MPIAIJ,
3069        0,
3070        0,
3071        0,
3072        0,
3073        0,
3074 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3075        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3076        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3077        MatPtAP_Basic,
3078        MatPtAPSymbolic_MPIAIJ,
3079 /*94*/ MatPtAPNumeric_MPIAIJ,
3080        0,
3081        0,
3082        0,
3083        0,
3084 /*99*/ 0,
3085        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3086        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3087        MatConjugate_MPIAIJ,
3088        0,
3089 /*104*/MatSetValuesRow_MPIAIJ,
3090        MatRealPart_MPIAIJ,
3091        MatImaginaryPart_MPIAIJ,
3092        0,
3093        0,
3094 /*109*/0,
3095        MatGetRedundantMatrix_MPIAIJ,
3096        MatGetRowMin_MPIAIJ,
3097        0,
3098        0,
3099 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3100        0,
3101        0,
3102        0,
3103        0,
3104 /*119*/0,
3105        0,
3106        0,
3107        0,
3108        MatGetMultiProcBlock_MPIAIJ,
3109 /*124*/MatFindNonZeroRows_MPIAIJ,
3110        MatGetColumnNorms_MPIAIJ,
3111        MatInvertBlockDiagonal_MPIAIJ,
3112        0,
3113        MatGetSubMatricesParallel_MPIAIJ,
3114 /*129*/0,
3115        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3116        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3117        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3118        0,
3119 /*134*/0,
3120        0,
3121        0,
3122        0,
3123        0
3124 };
3125 
3126 /* ----------------------------------------------------------------------------------------*/
3127 
3128 EXTERN_C_BEGIN
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3131 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3132 {
3133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3138   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 EXTERN_C_END
3142 
3143 EXTERN_C_BEGIN
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3146 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3147 {
3148   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3149   PetscErrorCode ierr;
3150 
3151   PetscFunctionBegin;
3152   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3153   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 EXTERN_C_END
3157 
3158 EXTERN_C_BEGIN
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3161 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3162 {
3163   Mat_MPIAIJ     *b;
3164   PetscErrorCode ierr;
3165   PetscInt       i;
3166   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3167 
3168   PetscFunctionBegin;
3169   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3170   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3171   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3172   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3173   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3174   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3175 
3176   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3177   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3180   if (d_nnz) {
3181     for (i=0; i<B->rmap->n; i++) {
3182       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3183     }
3184   }
3185   if (o_nnz) {
3186     for (i=0; i<B->rmap->n; i++) {
3187       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3188     }
3189   }
3190   b = (Mat_MPIAIJ*)B->data;
3191 
3192   if (!B->preallocated) {
3193     /* Explicitly create 2 MATSEQAIJ matrices. */
3194     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3195     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3196     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3197     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3198     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3199     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3200     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3201     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3202   }
3203 
3204   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3205   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3206   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3207   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3208   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3209   B->preallocated = PETSC_TRUE;
3210   PetscFunctionReturn(0);
3211 }
3212 EXTERN_C_END
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3216 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3217 {
3218   Mat            mat;
3219   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   *newmat       = 0;
3224   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3225   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3226   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3227   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3228   a    = (Mat_MPIAIJ*)mat->data;
3229 
3230   mat->factortype    = matin->factortype;
3231   mat->rmap->bs      = matin->rmap->bs;
3232   mat->assembled    = PETSC_TRUE;
3233   mat->insertmode   = NOT_SET_VALUES;
3234   mat->preallocated = PETSC_TRUE;
3235 
3236   a->size           = oldmat->size;
3237   a->rank           = oldmat->rank;
3238   a->donotstash     = oldmat->donotstash;
3239   a->roworiented    = oldmat->roworiented;
3240   a->rowindices     = 0;
3241   a->rowvalues      = 0;
3242   a->getrowactive   = PETSC_FALSE;
3243 
3244   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3245   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3246 
3247   if (oldmat->colmap) {
3248 #if defined (PETSC_USE_CTABLE)
3249     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3250 #else
3251     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3252     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3253     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3254 #endif
3255   } else a->colmap = 0;
3256   if (oldmat->garray) {
3257     PetscInt len;
3258     len  = oldmat->B->cmap->n;
3259     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3260     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3261     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3262   } else a->garray = 0;
3263 
3264   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3266   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3270   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3271   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3272   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3273   *newmat = mat;
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatLoad_MPIAIJ"
3281 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3282 {
3283   PetscScalar    *vals,*svals;
3284   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3285   PetscErrorCode ierr;
3286   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3287   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3288   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3289   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3290   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3291   int            fd;
3292 
3293   PetscFunctionBegin;
3294   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3295   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3296   if (!rank) {
3297     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3298     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3299     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3300   }
3301 
3302   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3303 
3304   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3305   M = header[1]; N = header[2];
3306   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3307   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3308   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3309 
3310   /* If global sizes are set, check if they are consistent with that given in the file */
3311   if (sizesset) {
3312     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3313   }
3314   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3315   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3316 
3317   /* determine ownership of all rows */
3318   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3319   else m = newMat->rmap->n; /* Set by user */
3320 
3321   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3322   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3323 
3324   /* First process needs enough room for process with most rows */
3325   if (!rank) {
3326     mmax       = rowners[1];
3327     for (i=2; i<size; i++) {
3328       mmax = PetscMax(mmax,rowners[i]);
3329     }
3330   } else mmax = m;
3331 
3332   rowners[0] = 0;
3333   for (i=2; i<=size; i++) {
3334     rowners[i] += rowners[i-1];
3335   }
3336   rstart = rowners[rank];
3337   rend   = rowners[rank+1];
3338 
3339   /* distribute row lengths to all processors */
3340   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3341   if (!rank) {
3342     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3343     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3344     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3345     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3346     for (j=0; j<m; j++) {
3347       procsnz[0] += ourlens[j];
3348     }
3349     for (i=1; i<size; i++) {
3350       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3351       /* calculate the number of nonzeros on each processor */
3352       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3353         procsnz[i] += rowlengths[j];
3354       }
3355       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3356     }
3357     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3358   } else {
3359     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3360   }
3361 
3362   if (!rank) {
3363     /* determine max buffer needed and allocate it */
3364     maxnz = 0;
3365     for (i=0; i<size; i++) {
3366       maxnz = PetscMax(maxnz,procsnz[i]);
3367     }
3368     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3369 
3370     /* read in my part of the matrix column indices  */
3371     nz   = procsnz[0];
3372     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3373     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3374 
3375     /* read in every one elses and ship off */
3376     for (i=1; i<size; i++) {
3377       nz     = procsnz[i];
3378       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3379       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3380     }
3381     ierr = PetscFree(cols);CHKERRQ(ierr);
3382   } else {
3383     /* determine buffer space needed for message */
3384     nz = 0;
3385     for (i=0; i<m; i++) {
3386       nz += ourlens[i];
3387     }
3388     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3389 
3390     /* receive message of column indices*/
3391     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3392   }
3393 
3394   /* determine column ownership if matrix is not square */
3395   if (N != M) {
3396     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3397     else n = newMat->cmap->n;
3398     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3399     cstart = cend - n;
3400   } else {
3401     cstart = rstart;
3402     cend   = rend;
3403     n      = cend - cstart;
3404   }
3405 
3406   /* loop over local rows, determining number of off diagonal entries */
3407   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3408   jj = 0;
3409   for (i=0; i<m; i++) {
3410     for (j=0; j<ourlens[i]; j++) {
3411       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3412       jj++;
3413     }
3414   }
3415 
3416   for (i=0; i<m; i++) {
3417     ourlens[i] -= offlens[i];
3418   }
3419   if (!sizesset) {
3420     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3421   }
3422   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3423 
3424   for (i=0; i<m; i++) {
3425     ourlens[i] += offlens[i];
3426   }
3427 
3428   if (!rank) {
3429     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3430 
3431     /* read in my part of the matrix numerical values  */
3432     nz   = procsnz[0];
3433     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3434 
3435     /* insert into matrix */
3436     jj      = rstart;
3437     smycols = mycols;
3438     svals   = vals;
3439     for (i=0; i<m; i++) {
3440       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3441       smycols += ourlens[i];
3442       svals   += ourlens[i];
3443       jj++;
3444     }
3445 
3446     /* read in other processors and ship out */
3447     for (i=1; i<size; i++) {
3448       nz     = procsnz[i];
3449       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3450       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3451     }
3452     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3453   } else {
3454     /* receive numeric values */
3455     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3456 
3457     /* receive message of values*/
3458     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3459 
3460     /* insert into matrix */
3461     jj      = rstart;
3462     smycols = mycols;
3463     svals   = vals;
3464     for (i=0; i<m; i++) {
3465       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3466       smycols += ourlens[i];
3467       svals   += ourlens[i];
3468       jj++;
3469     }
3470   }
3471   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3472   ierr = PetscFree(vals);CHKERRQ(ierr);
3473   ierr = PetscFree(mycols);CHKERRQ(ierr);
3474   ierr = PetscFree(rowners);CHKERRQ(ierr);
3475 
3476   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3477   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3483 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3484 {
3485   PetscErrorCode ierr;
3486   IS             iscol_local;
3487   PetscInt       csize;
3488 
3489   PetscFunctionBegin;
3490   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3491   if (call == MAT_REUSE_MATRIX) {
3492     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3493     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3494   } else {
3495     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3496   }
3497   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3498   if (call == MAT_INITIAL_MATRIX) {
3499     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3500     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3501   }
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3507 /*
3508     Not great since it makes two copies of the submatrix, first an SeqAIJ
3509   in local and then by concatenating the local matrices the end result.
3510   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3511 
3512   Note: This requires a sequential iscol with all indices.
3513 */
3514 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3515 {
3516   PetscErrorCode ierr;
3517   PetscMPIInt    rank,size;
3518   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3519   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3520   Mat            *local,M,Mreuse;
3521   MatScalar      *vwork,*aa;
3522   MPI_Comm       comm = ((PetscObject)mat)->comm;
3523   Mat_SeqAIJ     *aij;
3524 
3525 
3526   PetscFunctionBegin;
3527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3529 
3530   if (call ==  MAT_REUSE_MATRIX) {
3531     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3532     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3533     local = &Mreuse;
3534     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3535   } else {
3536     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3537     Mreuse = *local;
3538     ierr   = PetscFree(local);CHKERRQ(ierr);
3539   }
3540 
3541   /*
3542       m - number of local rows
3543       n - number of columns (same on all processors)
3544       rstart - first row in new global matrix generated
3545   */
3546   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3547   if (call == MAT_INITIAL_MATRIX) {
3548     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3549     ii  = aij->i;
3550     jj  = aij->j;
3551 
3552     /*
3553         Determine the number of non-zeros in the diagonal and off-diagonal
3554         portions of the matrix in order to do correct preallocation
3555     */
3556 
3557     /* first get start and end of "diagonal" columns */
3558     if (csize == PETSC_DECIDE) {
3559       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3560       if (mglobal == n) { /* square matrix */
3561 	nlocal = m;
3562       } else {
3563         nlocal = n/size + ((n % size) > rank);
3564       }
3565     } else {
3566       nlocal = csize;
3567     }
3568     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3569     rstart = rend - nlocal;
3570     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3571 
3572     /* next, compute all the lengths */
3573     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3574     olens = dlens + m;
3575     for (i=0; i<m; i++) {
3576       jend = ii[i+1] - ii[i];
3577       olen = 0;
3578       dlen = 0;
3579       for (j=0; j<jend; j++) {
3580         if (*jj < rstart || *jj >= rend) olen++;
3581         else dlen++;
3582         jj++;
3583       }
3584       olens[i] = olen;
3585       dlens[i] = dlen;
3586     }
3587     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3588     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3589     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3590     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3591     ierr = PetscFree(dlens);CHKERRQ(ierr);
3592   } else {
3593     PetscInt ml,nl;
3594 
3595     M = *newmat;
3596     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3597     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3598     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3599     /*
3600          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3601        rather than the slower MatSetValues().
3602     */
3603     M->was_assembled = PETSC_TRUE;
3604     M->assembled     = PETSC_FALSE;
3605   }
3606   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3607   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3608   ii  = aij->i;
3609   jj  = aij->j;
3610   aa  = aij->a;
3611   for (i=0; i<m; i++) {
3612     row   = rstart + i;
3613     nz    = ii[i+1] - ii[i];
3614     cwork = jj;     jj += nz;
3615     vwork = aa;     aa += nz;
3616     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3617   }
3618 
3619   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621   *newmat = M;
3622 
3623   /* save submatrix used in processor for next request */
3624   if (call ==  MAT_INITIAL_MATRIX) {
3625     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3626     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3627   }
3628 
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 EXTERN_C_BEGIN
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3635 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3636 {
3637   PetscInt       m,cstart, cend,j,nnz,i,d;
3638   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3639   const PetscInt *JJ;
3640   PetscScalar    *values;
3641   PetscErrorCode ierr;
3642 
3643   PetscFunctionBegin;
3644   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3645 
3646   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3648   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3649   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3650   m      = B->rmap->n;
3651   cstart = B->cmap->rstart;
3652   cend   = B->cmap->rend;
3653   rstart = B->rmap->rstart;
3654 
3655   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3656 
3657 #if defined(PETSC_USE_DEBUGGING)
3658   for (i=0; i<m; i++) {
3659     nnz     = Ii[i+1]- Ii[i];
3660     JJ      = J + Ii[i];
3661     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3662     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3663     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3664   }
3665 #endif
3666 
3667   for (i=0; i<m; i++) {
3668     nnz     = Ii[i+1]- Ii[i];
3669     JJ      = J + Ii[i];
3670     nnz_max = PetscMax(nnz_max,nnz);
3671     d       = 0;
3672     for (j=0; j<nnz; j++) {
3673       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3674     }
3675     d_nnz[i] = d;
3676     o_nnz[i] = nnz - d;
3677   }
3678   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3679   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3680 
3681   if (v) values = (PetscScalar*)v;
3682   else {
3683     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3684     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3685   }
3686 
3687   for (i=0; i<m; i++) {
3688     ii   = i + rstart;
3689     nnz  = Ii[i+1]- Ii[i];
3690     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3691   }
3692   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3694 
3695   if (!v) {
3696     ierr = PetscFree(values);CHKERRQ(ierr);
3697   }
3698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3699   PetscFunctionReturn(0);
3700 }
3701 EXTERN_C_END
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3705 /*@
3706    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3707    (the default parallel PETSc format).
3708 
3709    Collective on MPI_Comm
3710 
3711    Input Parameters:
3712 +  B - the matrix
3713 .  i - the indices into j for the start of each local row (starts with zero)
3714 .  j - the column indices for each local row (starts with zero)
3715 -  v - optional values in the matrix
3716 
3717    Level: developer
3718 
3719    Notes:
3720        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3721      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3722      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3723 
3724        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3725 
3726        The format which is used for the sparse matrix input, is equivalent to a
3727     row-major ordering.. i.e for the following matrix, the input data expected is
3728     as shown:
3729 
3730         1 0 0
3731         2 0 3     P0
3732        -------
3733         4 5 6     P1
3734 
3735      Process0 [P0]: rows_owned=[0,1]
3736         i =  {0,1,3}  [size = nrow+1  = 2+1]
3737         j =  {0,0,2}  [size = nz = 6]
3738         v =  {1,2,3}  [size = nz = 6]
3739 
3740      Process1 [P1]: rows_owned=[2]
3741         i =  {0,3}    [size = nrow+1  = 1+1]
3742         j =  {0,1,2}  [size = nz = 6]
3743         v =  {4,5,6}  [size = nz = 6]
3744 
3745 .keywords: matrix, aij, compressed row, sparse, parallel
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3748           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3749 @*/
3750 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3751 {
3752   PetscErrorCode ierr;
3753 
3754   PetscFunctionBegin;
3755   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3761 /*@C
3762    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3763    (the default parallel PETSc format).  For good matrix assembly performance
3764    the user should preallocate the matrix storage by setting the parameters
3765    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3766    performance can be increased by more than a factor of 50.
3767 
3768    Collective on MPI_Comm
3769 
3770    Input Parameters:
3771 +  A - the matrix
3772 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3773            (same value is used for all local rows)
3774 .  d_nnz - array containing the number of nonzeros in the various rows of the
3775            DIAGONAL portion of the local submatrix (possibly different for each row)
3776            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3777            The size of this array is equal to the number of local rows, i.e 'm'.
3778            For matrices that will be factored, you must leave room for (and set)
3779            the diagonal entry even if it is zero.
3780 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3781            submatrix (same value is used for all local rows).
3782 -  o_nnz - array containing the number of nonzeros in the various rows of the
3783            OFF-DIAGONAL portion of the local submatrix (possibly different for
3784            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3785            structure. The size of this array is equal to the number
3786            of local rows, i.e 'm'.
3787 
3788    If the *_nnz parameter is given then the *_nz parameter is ignored
3789 
3790    The AIJ format (also called the Yale sparse matrix format or
3791    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3792    storage.  The stored row and column indices begin with zero.
3793    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3794 
3795    The parallel matrix is partitioned such that the first m0 rows belong to
3796    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3797    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3798 
3799    The DIAGONAL portion of the local submatrix of a processor can be defined
3800    as the submatrix which is obtained by extraction the part corresponding to
3801    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3802    first row that belongs to the processor, r2 is the last row belonging to
3803    the this processor, and c1-c2 is range of indices of the local part of a
3804    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3805    common case of a square matrix, the row and column ranges are the same and
3806    the DIAGONAL part is also square. The remaining portion of the local
3807    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3808 
3809    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3810 
3811    You can call MatGetInfo() to get information on how effective the preallocation was;
3812    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3813    You can also run with the option -info and look for messages with the string
3814    malloc in them to see if additional memory allocation was needed.
3815 
3816    Example usage:
3817 
3818    Consider the following 8x8 matrix with 34 non-zero values, that is
3819    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3820    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3821    as follows:
3822 
3823 .vb
3824             1  2  0  |  0  3  0  |  0  4
3825     Proc0   0  5  6  |  7  0  0  |  8  0
3826             9  0 10  | 11  0  0  | 12  0
3827     -------------------------------------
3828            13  0 14  | 15 16 17  |  0  0
3829     Proc1   0 18  0  | 19 20 21  |  0  0
3830             0  0  0  | 22 23  0  | 24  0
3831     -------------------------------------
3832     Proc2  25 26 27  |  0  0 28  | 29  0
3833            30  0  0  | 31 32 33  |  0 34
3834 .ve
3835 
3836    This can be represented as a collection of submatrices as:
3837 
3838 .vb
3839       A B C
3840       D E F
3841       G H I
3842 .ve
3843 
3844    Where the submatrices A,B,C are owned by proc0, D,E,F are
3845    owned by proc1, G,H,I are owned by proc2.
3846 
3847    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3848    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849    The 'M','N' parameters are 8,8, and have the same values on all procs.
3850 
3851    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3852    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3853    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3854    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3855    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3856    matrix, ans [DF] as another SeqAIJ matrix.
3857 
3858    When d_nz, o_nz parameters are specified, d_nz storage elements are
3859    allocated for every row of the local diagonal submatrix, and o_nz
3860    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3861    One way to choose d_nz and o_nz is to use the max nonzerors per local
3862    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3863    In this case, the values of d_nz,o_nz are:
3864 .vb
3865      proc0 : dnz = 2, o_nz = 2
3866      proc1 : dnz = 3, o_nz = 2
3867      proc2 : dnz = 1, o_nz = 4
3868 .ve
3869    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3870    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3871    for proc3. i.e we are using 12+15+10=37 storage locations to store
3872    34 values.
3873 
3874    When d_nnz, o_nnz parameters are specified, the storage is specified
3875    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3876    In the above case the values for d_nnz,o_nnz are:
3877 .vb
3878      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3879      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3880      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3881 .ve
3882    Here the space allocated is sum of all the above values i.e 34, and
3883    hence pre-allocation is perfect.
3884 
3885    Level: intermediate
3886 
3887 .keywords: matrix, aij, compressed row, sparse, parallel
3888 
3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3890           MPIAIJ, MatGetInfo()
3891 @*/
3892 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3898   PetscValidType(B,1);
3899   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3905 /*@
3906      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3907          CSR format the local rows.
3908 
3909    Collective on MPI_Comm
3910 
3911    Input Parameters:
3912 +  comm - MPI communicator
3913 .  m - number of local rows (Cannot be PETSC_DECIDE)
3914 .  n - This value should be the same as the local size used in creating the
3915        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3916        calculated if N is given) For square matrices n is almost always m.
3917 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3918 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3919 .   i - row indices
3920 .   j - column indices
3921 -   a - matrix values
3922 
3923    Output Parameter:
3924 .   mat - the matrix
3925 
3926    Level: intermediate
3927 
3928    Notes:
3929        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown:
3938 
3939         1 0 0
3940         2 0 3     P0
3941        -------
3942         4 5 6     P1
3943 
3944      Process0 [P0]: rows_owned=[0,1]
3945         i =  {0,1,3}  [size = nrow+1  = 2+1]
3946         j =  {0,0,2}  [size = nz = 6]
3947         v =  {1,2,3}  [size = nz = 6]
3948 
3949      Process1 [P1]: rows_owned=[2]
3950         i =  {0,3}    [size = nrow+1  = 1+1]
3951         j =  {0,1,2}  [size = nz = 6]
3952         v =  {4,5,6}  [size = nz = 6]
3953 
3954 .keywords: matrix, aij, compressed row, sparse, parallel
3955 
3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3957           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3958 @*/
3959 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3960 {
3961   PetscErrorCode ierr;
3962 
3963   PetscFunctionBegin;
3964   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3965   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3966   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3967   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3968   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3969   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateAIJ"
3975 /*@C
3976    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3977    (the default parallel PETSc format).  For good matrix assembly performance
3978    the user should preallocate the matrix storage by setting the parameters
3979    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3980    performance can be increased by more than a factor of 50.
3981 
3982    Collective on MPI_Comm
3983 
3984    Input Parameters:
3985 +  comm - MPI communicator
3986 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3987            This value should be the same as the local size used in creating the
3988            y vector for the matrix-vector product y = Ax.
3989 .  n - This value should be the same as the local size used in creating the
3990        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991        calculated if N is given) For square matrices n is almost always m.
3992 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3995            (same value is used for all local rows)
3996 .  d_nnz - array containing the number of nonzeros in the various rows of the
3997            DIAGONAL portion of the local submatrix (possibly different for each row)
3998            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3999            The size of this array is equal to the number of local rows, i.e 'm'.
4000 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4001            submatrix (same value is used for all local rows).
4002 -  o_nnz - array containing the number of nonzeros in the various rows of the
4003            OFF-DIAGONAL portion of the local submatrix (possibly different for
4004            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4005            structure. The size of this array is equal to the number
4006            of local rows, i.e 'm'.
4007 
4008    Output Parameter:
4009 .  A - the matrix
4010 
4011    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4012    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4013    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4014 
4015    Notes:
4016    If the *_nnz parameter is given then the *_nz parameter is ignored
4017 
4018    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4019    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4020    storage requirements for this matrix.
4021 
4022    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4023    processor than it must be used on all processors that share the object for
4024    that argument.
4025 
4026    The user MUST specify either the local or global matrix dimensions
4027    (possibly both).
4028 
4029    The parallel matrix is partitioned across processors such that the
4030    first m0 rows belong to process 0, the next m1 rows belong to
4031    process 1, the next m2 rows belong to process 2 etc.. where
4032    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4033    values corresponding to [m x N] submatrix.
4034 
4035    The columns are logically partitioned with the n0 columns belonging
4036    to 0th partition, the next n1 columns belonging to the next
4037    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4038 
4039    The DIAGONAL portion of the local submatrix on any given processor
4040    is the submatrix corresponding to the rows and columns m,n
4041    corresponding to the given processor. i.e diagonal matrix on
4042    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4043    etc. The remaining portion of the local submatrix [m x (N-n)]
4044    constitute the OFF-DIAGONAL portion. The example below better
4045    illustrates this concept.
4046 
4047    For a square global matrix we define each processor's diagonal portion
4048    to be its local rows and the corresponding columns (a square submatrix);
4049    each processor's off-diagonal portion encompasses the remainder of the
4050    local matrix (a rectangular submatrix).
4051 
4052    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4053 
4054    When calling this routine with a single process communicator, a matrix of
4055    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4056    type of communicator, use the construction mechanism:
4057      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4058 
4059    By default, this format uses inodes (identical nodes) when possible.
4060    We search for consecutive rows with the same nonzero structure, thereby
4061    reusing matrix information to achieve increased efficiency.
4062 
4063    Options Database Keys:
4064 +  -mat_no_inode  - Do not use inodes
4065 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4066 -  -mat_aij_oneindex - Internally use indexing starting at 1
4067         rather than 0.  Note that when calling MatSetValues(),
4068         the user still MUST index entries starting at 0!
4069 
4070 
4071    Example usage:
4072 
4073    Consider the following 8x8 matrix with 34 non-zero values, that is
4074    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4075    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4076    as follows:
4077 
4078 .vb
4079             1  2  0  |  0  3  0  |  0  4
4080     Proc0   0  5  6  |  7  0  0  |  8  0
4081             9  0 10  | 11  0  0  | 12  0
4082     -------------------------------------
4083            13  0 14  | 15 16 17  |  0  0
4084     Proc1   0 18  0  | 19 20 21  |  0  0
4085             0  0  0  | 22 23  0  | 24  0
4086     -------------------------------------
4087     Proc2  25 26 27  |  0  0 28  | 29  0
4088            30  0  0  | 31 32 33  |  0 34
4089 .ve
4090 
4091    This can be represented as a collection of submatrices as:
4092 
4093 .vb
4094       A B C
4095       D E F
4096       G H I
4097 .ve
4098 
4099    Where the submatrices A,B,C are owned by proc0, D,E,F are
4100    owned by proc1, G,H,I are owned by proc2.
4101 
4102    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'M','N' parameters are 8,8, and have the same values on all procs.
4105 
4106    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4107    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4108    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4109    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4110    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4111    matrix, ans [DF] as another SeqAIJ matrix.
4112 
4113    When d_nz, o_nz parameters are specified, d_nz storage elements are
4114    allocated for every row of the local diagonal submatrix, and o_nz
4115    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4116    One way to choose d_nz and o_nz is to use the max nonzerors per local
4117    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4118    In this case, the values of d_nz,o_nz are:
4119 .vb
4120      proc0 : dnz = 2, o_nz = 2
4121      proc1 : dnz = 3, o_nz = 2
4122      proc2 : dnz = 1, o_nz = 4
4123 .ve
4124    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4125    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4126    for proc3. i.e we are using 12+15+10=37 storage locations to store
4127    34 values.
4128 
4129    When d_nnz, o_nnz parameters are specified, the storage is specified
4130    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4131    In the above case the values for d_nnz,o_nnz are:
4132 .vb
4133      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4134      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4135      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4136 .ve
4137    Here the space allocated is sum of all the above values i.e 34, and
4138    hence pre-allocation is perfect.
4139 
4140    Level: intermediate
4141 
4142 .keywords: matrix, aij, compressed row, sparse, parallel
4143 
4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4145           MPIAIJ, MatCreateMPIAIJWithArrays()
4146 @*/
4147 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4148 {
4149   PetscErrorCode ierr;
4150   PetscMPIInt    size;
4151 
4152   PetscFunctionBegin;
4153   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4154   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4156   if (size > 1) {
4157     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4158     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4159   } else {
4160     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4161     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4162   }
4163   PetscFunctionReturn(0);
4164 }
4165 
4166 #undef __FUNCT__
4167 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4168 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4169 {
4170   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4171 
4172   PetscFunctionBegin;
4173   *Ad     = a->A;
4174   *Ao     = a->B;
4175   *colmap = a->garray;
4176   PetscFunctionReturn(0);
4177 }
4178 
4179 #undef __FUNCT__
4180 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4181 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4182 {
4183   PetscErrorCode ierr;
4184   PetscInt       i;
4185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4186 
4187   PetscFunctionBegin;
4188   if (coloring->ctype == IS_COLORING_GLOBAL) {
4189     ISColoringValue *allcolors,*colors;
4190     ISColoring      ocoloring;
4191 
4192     /* set coloring for diagonal portion */
4193     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4194 
4195     /* set coloring for off-diagonal portion */
4196     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4197     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4198     for (i=0; i<a->B->cmap->n; i++) {
4199       colors[i] = allcolors[a->garray[i]];
4200     }
4201     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4202     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4203     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4204     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4205   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4206     ISColoringValue *colors;
4207     PetscInt        *larray;
4208     ISColoring      ocoloring;
4209 
4210     /* set coloring for diagonal portion */
4211     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4212     for (i=0; i<a->A->cmap->n; i++) {
4213       larray[i] = i + A->cmap->rstart;
4214     }
4215     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4216     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4217     for (i=0; i<a->A->cmap->n; i++) {
4218       colors[i] = coloring->colors[larray[i]];
4219     }
4220     ierr = PetscFree(larray);CHKERRQ(ierr);
4221     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4222     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4223     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4224 
4225     /* set coloring for off-diagonal portion */
4226     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4227     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4228     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4229     for (i=0; i<a->B->cmap->n; i++) {
4230       colors[i] = coloring->colors[larray[i]];
4231     }
4232     ierr = PetscFree(larray);CHKERRQ(ierr);
4233     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4234     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4235     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4236   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4237 
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 #if defined(PETSC_HAVE_ADIC)
4242 #undef __FUNCT__
4243 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4244 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4245 {
4246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4247   PetscErrorCode ierr;
4248 
4249   PetscFunctionBegin;
4250   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4251   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4252   PetscFunctionReturn(0);
4253 }
4254 #endif
4255 
4256 #undef __FUNCT__
4257 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4258 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4259 {
4260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4261   PetscErrorCode ierr;
4262 
4263   PetscFunctionBegin;
4264   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4265   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 #undef __FUNCT__
4270 #define __FUNCT__ "MatMergeSymbolic"
4271 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4272 {
4273   PetscErrorCode ierr;
4274   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4275   PetscInt       *indx;
4276 
4277   PetscFunctionBegin;
4278   /* This routine will ONLY return MPIAIJ type matrix */
4279   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4280   if (n == PETSC_DECIDE){
4281     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4282   }
4283   /* Check sum(n) = N */
4284   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr);
4285   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4286 
4287   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4288   rstart -= m;
4289 
4290   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4291   for (i=0;i<m;i++) {
4292     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4293     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4294     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4295   }
4296 
4297   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4298   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4299   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4300   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4301   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4302   PetscFunctionReturn(0);
4303 }
4304 
4305 #undef __FUNCT__
4306 #define __FUNCT__ "MatMergeNumeric"
4307 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4308 {
4309   PetscErrorCode ierr;
4310   PetscInt       m,N,i,rstart,nnz,Ii;
4311   PetscInt       *indx;
4312   PetscScalar    *values;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4316   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4317   for (i=0;i<m;i++) {
4318     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4319     Ii    = i + rstart;
4320     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4321     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4322   }
4323   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatMerge"
4330 /*@
4331       MatMerge - Creates a single large PETSc matrix by concatenating sequential
4332                  matrices from each processor
4333 
4334     Collective on MPI_Comm
4335 
4336    Input Parameters:
4337 +    comm - the communicators the parallel matrix will live on
4338 .    inmat - the input sequential matrices
4339 .    n - number of local columns (or PETSC_DECIDE)
4340 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4341 
4342    Output Parameter:
4343 .    outmat - the parallel matrix generated
4344 
4345     Level: advanced
4346 
4347    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4348 
4349 @*/
4350 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4351 {
4352   PetscErrorCode ierr;
4353 
4354   PetscFunctionBegin;
4355   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4356   if (scall == MAT_INITIAL_MATRIX){
4357     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4358   }
4359   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4360   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatFileSplit"
4366 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4367 {
4368   PetscErrorCode    ierr;
4369   PetscMPIInt       rank;
4370   PetscInt          m,N,i,rstart,nnz;
4371   size_t            len;
4372   const PetscInt    *indx;
4373   PetscViewer       out;
4374   char              *name;
4375   Mat               B;
4376   const PetscScalar *values;
4377 
4378   PetscFunctionBegin;
4379   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4380   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4381   /* Should this be the type of the diagonal block of A? */
4382   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4383   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4384   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4385   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4386   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4387   for (i=0;i<m;i++) {
4388     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4389     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4390     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4391   }
4392   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394 
4395   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4396   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4397   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4398   sprintf(name,"%s.%d",outfile,rank);
4399   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4400   ierr = PetscFree(name);
4401   ierr = MatView(B,out);CHKERRQ(ierr);
4402   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4403   ierr = MatDestroy(&B);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4410 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4411 {
4412   PetscErrorCode       ierr;
4413   Mat_Merge_SeqsToMPI  *merge;
4414   PetscContainer       container;
4415 
4416   PetscFunctionBegin;
4417   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4418   if (container) {
4419     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4432     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4433     ierr = PetscFree(merge);CHKERRQ(ierr);
4434     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4435   }
4436   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 #include <../src/mat/utils/freespace.h>
4441 #include <petscbt.h>
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4445 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4446 {
4447   PetscErrorCode       ierr;
4448   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4449   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4450   PetscMPIInt          size,rank,taga,*len_s;
4451   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4452   PetscInt             proc,m;
4453   PetscInt             **buf_ri,**buf_rj;
4454   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4455   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4456   MPI_Request          *s_waits,*r_waits;
4457   MPI_Status           *status;
4458   MatScalar            *aa=a->a;
4459   MatScalar            **abuf_r,*ba_i;
4460   Mat_Merge_SeqsToMPI  *merge;
4461   PetscContainer       container;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4465 
4466   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4468 
4469   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4470   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4471 
4472   bi     = merge->bi;
4473   bj     = merge->bj;
4474   buf_ri = merge->buf_ri;
4475   buf_rj = merge->buf_rj;
4476 
4477   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4478   owners = merge->rowmap->range;
4479   len_s  = merge->len_s;
4480 
4481   /* send and recv matrix values */
4482   /*-----------------------------*/
4483   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4484   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4485 
4486   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4487   for (proc=0,k=0; proc<size; proc++){
4488     if (!len_s[proc]) continue;
4489     i = owners[proc];
4490     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4491     k++;
4492   }
4493 
4494   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4495   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4496   ierr = PetscFree(status);CHKERRQ(ierr);
4497 
4498   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4499   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4500 
4501   /* insert mat values of mpimat */
4502   /*----------------------------*/
4503   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4504   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4505 
4506   for (k=0; k<merge->nrecv; k++){
4507     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4508     nrows = *(buf_ri_k[k]);
4509     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4510     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4511   }
4512 
4513   /* set values of ba */
4514   m = merge->rowmap->n;
4515   for (i=0; i<m; i++) {
4516     arow = owners[rank] + i;
4517     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4518     bnzi = bi[i+1] - bi[i];
4519     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4520 
4521     /* add local non-zero vals of this proc's seqmat into ba */
4522     anzi = ai[arow+1] - ai[arow];
4523     aj   = a->j + ai[arow];
4524     aa   = a->a + ai[arow];
4525     nextaj = 0;
4526     for (j=0; nextaj<anzi; j++){
4527       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4528         ba_i[j] += aa[nextaj++];
4529       }
4530     }
4531 
4532     /* add received vals into ba */
4533     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4534       /* i-th row */
4535       if (i == *nextrow[k]) {
4536         anzi = *(nextai[k]+1) - *nextai[k];
4537         aj   = buf_rj[k] + *(nextai[k]);
4538         aa   = abuf_r[k] + *(nextai[k]);
4539         nextaj = 0;
4540         for (j=0; nextaj<anzi; j++){
4541           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4542             ba_i[j] += aa[nextaj++];
4543           }
4544         }
4545         nextrow[k]++; nextai[k]++;
4546       }
4547     }
4548     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4549   }
4550   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552 
4553   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4554   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4555   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4556   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4557   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4562 
4563 #undef __FUNCT__
4564 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4565 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4566 {
4567   PetscErrorCode       ierr;
4568   Mat                  B_mpi;
4569   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4570   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4571   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4572   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4573   PetscInt             len,proc,*dnz,*onz;
4574   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4575   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4576   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4577   MPI_Status           *status;
4578   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4579   PetscBT              lnkbt;
4580   Mat_Merge_SeqsToMPI  *merge;
4581   PetscContainer       container;
4582 
4583   PetscFunctionBegin;
4584   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4585 
4586   /* make sure it is a PETSc comm */
4587   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4590 
4591   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4592   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4593 
4594   /* determine row ownership */
4595   /*---------------------------------------------------------*/
4596   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4597   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4598   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4599   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4600   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4601   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4602   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4603 
4604   m      = merge->rowmap->n;
4605   M      = merge->rowmap->N;
4606   owners = merge->rowmap->range;
4607 
4608   /* determine the number of messages to send, their lengths */
4609   /*---------------------------------------------------------*/
4610   len_s  = merge->len_s;
4611 
4612   len = 0;  /* length of buf_si[] */
4613   merge->nsend = 0;
4614   for (proc=0; proc<size; proc++){
4615     len_si[proc] = 0;
4616     if (proc == rank){
4617       len_s[proc] = 0;
4618     } else {
4619       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4620       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4621     }
4622     if (len_s[proc]) {
4623       merge->nsend++;
4624       nrows = 0;
4625       for (i=owners[proc]; i<owners[proc+1]; i++){
4626         if (ai[i+1] > ai[i]) nrows++;
4627       }
4628       len_si[proc] = 2*(nrows+1);
4629       len += len_si[proc];
4630     }
4631   }
4632 
4633   /* determine the number and length of messages to receive for ij-structure */
4634   /*-------------------------------------------------------------------------*/
4635   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4636   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4637 
4638   /* post the Irecv of j-structure */
4639   /*-------------------------------*/
4640   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4641   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4642 
4643   /* post the Isend of j-structure */
4644   /*--------------------------------*/
4645   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4646 
4647   for (proc=0, k=0; proc<size; proc++){
4648     if (!len_s[proc]) continue;
4649     i = owners[proc];
4650     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4651     k++;
4652   }
4653 
4654   /* receives and sends of j-structure are complete */
4655   /*------------------------------------------------*/
4656   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4657   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4658 
4659   /* send and recv i-structure */
4660   /*---------------------------*/
4661   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4662   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4663 
4664   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4665   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4666   for (proc=0,k=0; proc<size; proc++){
4667     if (!len_s[proc]) continue;
4668     /* form outgoing message for i-structure:
4669          buf_si[0]:                 nrows to be sent
4670                [1:nrows]:           row index (global)
4671                [nrows+1:2*nrows+1]: i-structure index
4672     */
4673     /*-------------------------------------------*/
4674     nrows = len_si[proc]/2 - 1;
4675     buf_si_i    = buf_si + nrows+1;
4676     buf_si[0]   = nrows;
4677     buf_si_i[0] = 0;
4678     nrows = 0;
4679     for (i=owners[proc]; i<owners[proc+1]; i++){
4680       anzi = ai[i+1] - ai[i];
4681       if (anzi) {
4682         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4683         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4684         nrows++;
4685       }
4686     }
4687     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4688     k++;
4689     buf_si += len_si[proc];
4690   }
4691 
4692   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4693   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4694 
4695   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4696   for (i=0; i<merge->nrecv; i++){
4697     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4698   }
4699 
4700   ierr = PetscFree(len_si);CHKERRQ(ierr);
4701   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4702   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4703   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4704   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4705   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4706   ierr = PetscFree(status);CHKERRQ(ierr);
4707 
4708   /* compute a local seq matrix in each processor */
4709   /*----------------------------------------------*/
4710   /* allocate bi array and free space for accumulating nonzero column info */
4711   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4712   bi[0] = 0;
4713 
4714   /* create and initialize a linked list */
4715   nlnk = N+1;
4716   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4717 
4718   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4719   len = 0;
4720   len  = ai[owners[rank+1]] - ai[owners[rank]];
4721   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4722   current_space = free_space;
4723 
4724   /* determine symbolic info for each local row */
4725   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4726 
4727   for (k=0; k<merge->nrecv; k++){
4728     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4729     nrows = *buf_ri_k[k];
4730     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4731     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4732   }
4733 
4734   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4735   len = 0;
4736   for (i=0;i<m;i++) {
4737     bnzi   = 0;
4738     /* add local non-zero cols of this proc's seqmat into lnk */
4739     arow   = owners[rank] + i;
4740     anzi   = ai[arow+1] - ai[arow];
4741     aj     = a->j + ai[arow];
4742     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4743     bnzi += nlnk;
4744     /* add received col data into lnk */
4745     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4746       if (i == *nextrow[k]) { /* i-th row */
4747         anzi = *(nextai[k]+1) - *nextai[k];
4748         aj   = buf_rj[k] + *nextai[k];
4749         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4750         bnzi += nlnk;
4751         nextrow[k]++; nextai[k]++;
4752       }
4753     }
4754     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4755 
4756     /* if free space is not available, make more free space */
4757     if (current_space->local_remaining<bnzi) {
4758       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4759       nspacedouble++;
4760     }
4761     /* copy data into free space, then initialize lnk */
4762     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4763     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4764 
4765     current_space->array           += bnzi;
4766     current_space->local_used      += bnzi;
4767     current_space->local_remaining -= bnzi;
4768 
4769     bi[i+1] = bi[i] + bnzi;
4770   }
4771 
4772   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4773 
4774   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4775   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4776   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4777 
4778   /* create symbolic parallel matrix B_mpi */
4779   /*---------------------------------------*/
4780   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4781   if (n==PETSC_DECIDE) {
4782     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4783   } else {
4784     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4785   }
4786   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4787   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4788   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4789   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4790 
4791   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4792   B_mpi->assembled     = PETSC_FALSE;
4793   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4794   merge->bi            = bi;
4795   merge->bj            = bj;
4796   merge->buf_ri        = buf_ri;
4797   merge->buf_rj        = buf_rj;
4798   merge->coi           = PETSC_NULL;
4799   merge->coj           = PETSC_NULL;
4800   merge->owners_co     = PETSC_NULL;
4801 
4802   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4803 
4804   /* attach the supporting struct to B_mpi for reuse */
4805   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4806   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4807   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4808   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4809   *mpimat = B_mpi;
4810 
4811   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4812   PetscFunctionReturn(0);
4813 }
4814 
4815 #undef __FUNCT__
4816 #define __FUNCT__ "MatMerge_SeqsToMPI"
4817 /*@C
4818       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4819                  matrices from each processor
4820 
4821     Collective on MPI_Comm
4822 
4823    Input Parameters:
4824 +    comm - the communicators the parallel matrix will live on
4825 .    seqmat - the input sequential matrices
4826 .    m - number of local rows (or PETSC_DECIDE)
4827 .    n - number of local columns (or PETSC_DECIDE)
4828 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4829 
4830    Output Parameter:
4831 .    mpimat - the parallel matrix generated
4832 
4833     Level: advanced
4834 
4835    Notes:
4836      The dimensions of the sequential matrix in each processor MUST be the same.
4837      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4838      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4839 @*/
4840 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4841 {
4842   PetscErrorCode   ierr;
4843   PetscMPIInt     size;
4844 
4845   PetscFunctionBegin;
4846   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4847   if (size == 1){
4848      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4849      if (scall == MAT_INITIAL_MATRIX){
4850        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4851      } else {
4852        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4853      }
4854      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4855      PetscFunctionReturn(0);
4856   }
4857   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4858   if (scall == MAT_INITIAL_MATRIX){
4859     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4860   }
4861   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4862   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4863   PetscFunctionReturn(0);
4864 }
4865 
4866 #undef __FUNCT__
4867 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4868 /*@
4869      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4870           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4871           with MatGetSize()
4872 
4873     Not Collective
4874 
4875    Input Parameters:
4876 +    A - the matrix
4877 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4878 
4879    Output Parameter:
4880 .    A_loc - the local sequential matrix generated
4881 
4882     Level: developer
4883 
4884 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4885 
4886 @*/
4887 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4888 {
4889   PetscErrorCode  ierr;
4890   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4891   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4892   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4893   MatScalar       *aa=a->a,*ba=b->a,*cam;
4894   PetscScalar     *ca;
4895   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4896   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4897   PetscBool       match;
4898 
4899   PetscFunctionBegin;
4900   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4901   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4902   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4903   if (scall == MAT_INITIAL_MATRIX){
4904     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4905     ci[0] = 0;
4906     for (i=0; i<am; i++){
4907       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4908     }
4909     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4910     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4911     k = 0;
4912     for (i=0; i<am; i++) {
4913       ncols_o = bi[i+1] - bi[i];
4914       ncols_d = ai[i+1] - ai[i];
4915       /* off-diagonal portion of A */
4916       for (jo=0; jo<ncols_o; jo++) {
4917         col = cmap[*bj];
4918         if (col >= cstart) break;
4919         cj[k]   = col; bj++;
4920         ca[k++] = *ba++;
4921       }
4922       /* diagonal portion of A */
4923       for (j=0; j<ncols_d; j++) {
4924         cj[k]   = cstart + *aj++;
4925         ca[k++] = *aa++;
4926       }
4927       /* off-diagonal portion of A */
4928       for (j=jo; j<ncols_o; j++) {
4929         cj[k]   = cmap[*bj++];
4930         ca[k++] = *ba++;
4931       }
4932     }
4933     /* put together the new matrix */
4934     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4935     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4936     /* Since these are PETSc arrays, change flags to free them as necessary. */
4937     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4938     mat->free_a  = PETSC_TRUE;
4939     mat->free_ij = PETSC_TRUE;
4940     mat->nonew   = 0;
4941   } else if (scall == MAT_REUSE_MATRIX){
4942     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4943     ci = mat->i; cj = mat->j; cam = mat->a;
4944     for (i=0; i<am; i++) {
4945       /* off-diagonal portion of A */
4946       ncols_o = bi[i+1] - bi[i];
4947       for (jo=0; jo<ncols_o; jo++) {
4948         col = cmap[*bj];
4949         if (col >= cstart) break;
4950         *cam++ = *ba++; bj++;
4951       }
4952       /* diagonal portion of A */
4953       ncols_d = ai[i+1] - ai[i];
4954       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4955       /* off-diagonal portion of A */
4956       for (j=jo; j<ncols_o; j++) {
4957         *cam++ = *ba++; bj++;
4958       }
4959     }
4960   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4961   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4967 /*@C
4968      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4969 
4970     Not Collective
4971 
4972    Input Parameters:
4973 +    A - the matrix
4974 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4975 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4976 
4977    Output Parameter:
4978 .    A_loc - the local sequential matrix generated
4979 
4980     Level: developer
4981 
4982 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4983 
4984 @*/
4985 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4986 {
4987   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4988   PetscErrorCode    ierr;
4989   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4990   IS                isrowa,iscola;
4991   Mat               *aloc;
4992   PetscBool       match;
4993 
4994   PetscFunctionBegin;
4995   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4996   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4997   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4998   if (!row){
4999     start = A->rmap->rstart; end = A->rmap->rend;
5000     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5001   } else {
5002     isrowa = *row;
5003   }
5004   if (!col){
5005     start = A->cmap->rstart;
5006     cmap  = a->garray;
5007     nzA   = a->A->cmap->n;
5008     nzB   = a->B->cmap->n;
5009     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5010     ncols = 0;
5011     for (i=0; i<nzB; i++) {
5012       if (cmap[i] < start) idx[ncols++] = cmap[i];
5013       else break;
5014     }
5015     imark = i;
5016     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5017     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5018     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5019   } else {
5020     iscola = *col;
5021   }
5022   if (scall != MAT_INITIAL_MATRIX){
5023     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5024     aloc[0] = *A_loc;
5025   }
5026   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5027   *A_loc = aloc[0];
5028   ierr = PetscFree(aloc);CHKERRQ(ierr);
5029   if (!row){
5030     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5031   }
5032   if (!col){
5033     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5034   }
5035   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 
5039 #undef __FUNCT__
5040 #define __FUNCT__ "MatGetBrowsOfAcols"
5041 /*@C
5042     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5043 
5044     Collective on Mat
5045 
5046    Input Parameters:
5047 +    A,B - the matrices in mpiaij format
5048 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5049 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5050 
5051    Output Parameter:
5052 +    rowb, colb - index sets of rows and columns of B to extract
5053 -    B_seq - the sequential matrix generated
5054 
5055     Level: developer
5056 
5057 @*/
5058 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5059 {
5060   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5061   PetscErrorCode    ierr;
5062   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5063   IS                isrowb,iscolb;
5064   Mat               *bseq=PETSC_NULL;
5065 
5066   PetscFunctionBegin;
5067   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5068     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5069   }
5070   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5071 
5072   if (scall == MAT_INITIAL_MATRIX){
5073     start = A->cmap->rstart;
5074     cmap  = a->garray;
5075     nzA   = a->A->cmap->n;
5076     nzB   = a->B->cmap->n;
5077     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5078     ncols = 0;
5079     for (i=0; i<nzB; i++) {  /* row < local row index */
5080       if (cmap[i] < start) idx[ncols++] = cmap[i];
5081       else break;
5082     }
5083     imark = i;
5084     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5085     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5086     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5087     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5088   } else {
5089     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5090     isrowb = *rowb; iscolb = *colb;
5091     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5092     bseq[0] = *B_seq;
5093   }
5094   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5095   *B_seq = bseq[0];
5096   ierr = PetscFree(bseq);CHKERRQ(ierr);
5097   if (!rowb){
5098     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5099   } else {
5100     *rowb = isrowb;
5101   }
5102   if (!colb){
5103     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5104   } else {
5105     *colb = iscolb;
5106   }
5107   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5108   PetscFunctionReturn(0);
5109 }
5110 
5111 #undef __FUNCT__
5112 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5113 /*
5114     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5115     of the OFF-DIAGONAL portion of local A
5116 
5117     Collective on Mat
5118 
5119    Input Parameters:
5120 +    A,B - the matrices in mpiaij format
5121 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5122 
5123    Output Parameter:
5124 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5125 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5126 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5127 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5128 
5129     Level: developer
5130 
5131 */
5132 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5133 {
5134   VecScatter_MPI_General *gen_to,*gen_from;
5135   PetscErrorCode         ierr;
5136   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5137   Mat_SeqAIJ             *b_oth;
5138   VecScatter             ctx=a->Mvctx;
5139   MPI_Comm               comm=((PetscObject)ctx)->comm;
5140   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5141   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5142   PetscScalar            *rvalues,*svalues;
5143   MatScalar              *b_otha,*bufa,*bufA;
5144   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5145   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5146   MPI_Status             *sstatus,rstatus;
5147   PetscMPIInt            jj;
5148   PetscInt               *cols,sbs,rbs;
5149   PetscScalar            *vals;
5150 
5151   PetscFunctionBegin;
5152   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5153     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5154   }
5155   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5157 
5158   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5159   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5160   rvalues  = gen_from->values; /* holds the length of receiving row */
5161   svalues  = gen_to->values;   /* holds the length of sending row */
5162   nrecvs   = gen_from->n;
5163   nsends   = gen_to->n;
5164 
5165   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5166   srow     = gen_to->indices;   /* local row index to be sent */
5167   sstarts  = gen_to->starts;
5168   sprocs   = gen_to->procs;
5169   sstatus  = gen_to->sstatus;
5170   sbs      = gen_to->bs;
5171   rstarts  = gen_from->starts;
5172   rprocs   = gen_from->procs;
5173   rbs      = gen_from->bs;
5174 
5175   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5176   if (scall == MAT_INITIAL_MATRIX){
5177     /* i-array */
5178     /*---------*/
5179     /*  post receives */
5180     for (i=0; i<nrecvs; i++){
5181       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5182       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5183       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5184     }
5185 
5186     /* pack the outgoing message */
5187     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5188     sstartsj[0] = 0;  rstartsj[0] = 0;
5189     len = 0; /* total length of j or a array to be sent */
5190     k = 0;
5191     for (i=0; i<nsends; i++){
5192       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5193       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5194       for (j=0; j<nrows; j++) {
5195         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5196         for (l=0; l<sbs; l++){
5197           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5198           rowlen[j*sbs+l] = ncols;
5199           len += ncols;
5200           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5201         }
5202         k++;
5203       }
5204       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5205       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5206     }
5207     /* recvs and sends of i-array are completed */
5208     i = nrecvs;
5209     while (i--) {
5210       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5211     }
5212     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5213 
5214     /* allocate buffers for sending j and a arrays */
5215     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5216     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5217 
5218     /* create i-array of B_oth */
5219     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5220     b_othi[0] = 0;
5221     len = 0; /* total length of j or a array to be received */
5222     k = 0;
5223     for (i=0; i<nrecvs; i++){
5224       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5225       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5226       for (j=0; j<nrows; j++) {
5227         b_othi[k+1] = b_othi[k] + rowlen[j];
5228         len += rowlen[j]; k++;
5229       }
5230       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5231     }
5232 
5233     /* allocate space for j and a arrrays of B_oth */
5234     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5235     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5236 
5237     /* j-array */
5238     /*---------*/
5239     /*  post receives of j-array */
5240     for (i=0; i<nrecvs; i++){
5241       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5242       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5243     }
5244 
5245     /* pack the outgoing message j-array */
5246     k = 0;
5247     for (i=0; i<nsends; i++){
5248       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5249       bufJ = bufj+sstartsj[i];
5250       for (j=0; j<nrows; j++) {
5251         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5252         for (ll=0; ll<sbs; ll++){
5253           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5254           for (l=0; l<ncols; l++){
5255             *bufJ++ = cols[l];
5256           }
5257           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5258         }
5259       }
5260       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5261     }
5262 
5263     /* recvs and sends of j-array are completed */
5264     i = nrecvs;
5265     while (i--) {
5266       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5267     }
5268     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5269   } else if (scall == MAT_REUSE_MATRIX){
5270     sstartsj = *startsj_s;
5271     rstartsj = *startsj_r;
5272     bufa     = *bufa_ptr;
5273     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5274     b_otha   = b_oth->a;
5275   } else {
5276     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5277   }
5278 
5279   /* a-array */
5280   /*---------*/
5281   /*  post receives of a-array */
5282   for (i=0; i<nrecvs; i++){
5283     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5284     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5285   }
5286 
5287   /* pack the outgoing message a-array */
5288   k = 0;
5289   for (i=0; i<nsends; i++){
5290     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5291     bufA = bufa+sstartsj[i];
5292     for (j=0; j<nrows; j++) {
5293       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5294       for (ll=0; ll<sbs; ll++){
5295         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5296         for (l=0; l<ncols; l++){
5297           *bufA++ = vals[l];
5298         }
5299         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5300       }
5301     }
5302     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5303   }
5304   /* recvs and sends of a-array are completed */
5305   i = nrecvs;
5306   while (i--) {
5307     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5308   }
5309   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5310   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5311 
5312   if (scall == MAT_INITIAL_MATRIX){
5313     /* put together the new matrix */
5314     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5315 
5316     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5317     /* Since these are PETSc arrays, change flags to free them as necessary. */
5318     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5319     b_oth->free_a  = PETSC_TRUE;
5320     b_oth->free_ij = PETSC_TRUE;
5321     b_oth->nonew   = 0;
5322 
5323     ierr = PetscFree(bufj);CHKERRQ(ierr);
5324     if (!startsj_s || !bufa_ptr){
5325       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5326       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5327     } else {
5328       *startsj_s = sstartsj;
5329       *startsj_r = rstartsj;
5330       *bufa_ptr  = bufa;
5331     }
5332   }
5333   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5334   PetscFunctionReturn(0);
5335 }
5336 
5337 #undef __FUNCT__
5338 #define __FUNCT__ "MatGetCommunicationStructs"
5339 /*@C
5340   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5341 
5342   Not Collective
5343 
5344   Input Parameters:
5345 . A - The matrix in mpiaij format
5346 
5347   Output Parameter:
5348 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5349 . colmap - A map from global column index to local index into lvec
5350 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5351 
5352   Level: developer
5353 
5354 @*/
5355 #if defined (PETSC_USE_CTABLE)
5356 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5357 #else
5358 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5359 #endif
5360 {
5361   Mat_MPIAIJ *a;
5362 
5363   PetscFunctionBegin;
5364   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5365   PetscValidPointer(lvec, 2);
5366   PetscValidPointer(colmap, 3);
5367   PetscValidPointer(multScatter, 4);
5368   a = (Mat_MPIAIJ *) A->data;
5369   if (lvec) *lvec = a->lvec;
5370   if (colmap) *colmap = a->colmap;
5371   if (multScatter) *multScatter = a->Mvctx;
5372   PetscFunctionReturn(0);
5373 }
5374 
5375 EXTERN_C_BEGIN
5376 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5377 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5378 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5379 EXTERN_C_END
5380 
5381 #undef __FUNCT__
5382 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5383 /*
5384     Computes (B'*A')' since computing B*A directly is untenable
5385 
5386                n                       p                          p
5387         (              )       (              )         (                  )
5388       m (      A       )  *  n (       B      )   =   m (         C        )
5389         (              )       (              )         (                  )
5390 
5391 */
5392 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5393 {
5394   PetscErrorCode     ierr;
5395   Mat                At,Bt,Ct;
5396 
5397   PetscFunctionBegin;
5398   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5399   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5400   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5401   ierr = MatDestroy(&At);CHKERRQ(ierr);
5402   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5403   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5404   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5405   PetscFunctionReturn(0);
5406 }
5407 
5408 #undef __FUNCT__
5409 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5410 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5411 {
5412   PetscErrorCode ierr;
5413   PetscInt       m=A->rmap->n,n=B->cmap->n;
5414   Mat            Cmat;
5415 
5416   PetscFunctionBegin;
5417   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5418   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5419   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5420   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5421   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5422   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5423   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5424   *C   = Cmat;
5425   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5426   PetscFunctionReturn(0);
5427 }
5428 
5429 /* ----------------------------------------------------------------*/
5430 #undef __FUNCT__
5431 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5432 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5433 {
5434   PetscErrorCode ierr;
5435 
5436   PetscFunctionBegin;
5437   if (scall == MAT_INITIAL_MATRIX){
5438     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5439   }
5440   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5441   PetscFunctionReturn(0);
5442 }
5443 
5444 EXTERN_C_BEGIN
5445 #if defined(PETSC_HAVE_MUMPS)
5446 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5447 #endif
5448 #if defined(PETSC_HAVE_PASTIX)
5449 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5450 #endif
5451 #if defined(PETSC_HAVE_SUPERLU_DIST)
5452 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5453 #endif
5454 #if defined(PETSC_HAVE_SPOOLES)
5455 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5456 #endif
5457 EXTERN_C_END
5458 
5459 /*MC
5460    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5461 
5462    Options Database Keys:
5463 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5464 
5465   Level: beginner
5466 
5467 .seealso: MatCreateAIJ()
5468 M*/
5469 
5470 EXTERN_C_BEGIN
5471 #undef __FUNCT__
5472 #define __FUNCT__ "MatCreate_MPIAIJ"
5473 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5474 {
5475   Mat_MPIAIJ     *b;
5476   PetscErrorCode ierr;
5477   PetscMPIInt    size;
5478 
5479   PetscFunctionBegin;
5480   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5481 
5482   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5483   B->data         = (void*)b;
5484   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5485   B->rmap->bs     = 1;
5486   B->assembled    = PETSC_FALSE;
5487 
5488   B->insertmode   = NOT_SET_VALUES;
5489   b->size         = size;
5490   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5491 
5492   /* build cache for off array entries formed */
5493   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5494   b->donotstash  = PETSC_FALSE;
5495   b->colmap      = 0;
5496   b->garray      = 0;
5497   b->roworiented = PETSC_TRUE;
5498 
5499   /* stuff used for matrix vector multiply */
5500   b->lvec      = PETSC_NULL;
5501   b->Mvctx     = PETSC_NULL;
5502 
5503   /* stuff for MatGetRow() */
5504   b->rowindices   = 0;
5505   b->rowvalues    = 0;
5506   b->getrowactive = PETSC_FALSE;
5507 
5508 #if defined(PETSC_HAVE_SPOOLES)
5509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5510                                      "MatGetFactor_mpiaij_spooles",
5511                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5512 #endif
5513 #if defined(PETSC_HAVE_MUMPS)
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5515                                      "MatGetFactor_aij_mumps",
5516                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5517 #endif
5518 #if defined(PETSC_HAVE_PASTIX)
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5520 					   "MatGetFactor_mpiaij_pastix",
5521 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5522 #endif
5523 #if defined(PETSC_HAVE_SUPERLU_DIST)
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5525                                      "MatGetFactor_mpiaij_superlu_dist",
5526                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5527 #endif
5528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5529                                      "MatStoreValues_MPIAIJ",
5530                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5532                                      "MatRetrieveValues_MPIAIJ",
5533                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5535 				     "MatGetDiagonalBlock_MPIAIJ",
5536                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5538 				     "MatIsTranspose_MPIAIJ",
5539 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5541 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5542 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5544 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5545 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5547 				     "MatDiagonalScaleLocal_MPIAIJ",
5548 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5550                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5551                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5553                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5554                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5556                                      "MatConvert_MPIAIJ_MPISBAIJ",
5557                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5559                                      "MatMatMult_MPIDense_MPIAIJ",
5560                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5562                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5563                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5564   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5565                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5566                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5567   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5568   PetscFunctionReturn(0);
5569 }
5570 EXTERN_C_END
5571 
5572 #undef __FUNCT__
5573 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5574 /*@
5575      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5576          and "off-diagonal" part of the matrix in CSR format.
5577 
5578    Collective on MPI_Comm
5579 
5580    Input Parameters:
5581 +  comm - MPI communicator
5582 .  m - number of local rows (Cannot be PETSC_DECIDE)
5583 .  n - This value should be the same as the local size used in creating the
5584        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5585        calculated if N is given) For square matrices n is almost always m.
5586 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5587 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5588 .   i - row indices for "diagonal" portion of matrix
5589 .   j - column indices
5590 .   a - matrix values
5591 .   oi - row indices for "off-diagonal" portion of matrix
5592 .   oj - column indices
5593 -   oa - matrix values
5594 
5595    Output Parameter:
5596 .   mat - the matrix
5597 
5598    Level: advanced
5599 
5600    Notes:
5601        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5602        must free the arrays once the matrix has been destroyed and not before.
5603 
5604        The i and j indices are 0 based
5605 
5606        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5607 
5608        This sets local rows and cannot be used to set off-processor values.
5609 
5610        You cannot later use MatSetValues() to change values in this matrix.
5611 
5612 .keywords: matrix, aij, compressed row, sparse, parallel
5613 
5614 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5615           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5616 @*/
5617 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5618 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5619 {
5620   PetscErrorCode ierr;
5621   Mat_MPIAIJ     *maij;
5622 
5623  PetscFunctionBegin;
5624   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5625   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5626   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5627   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5628   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5629   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5630   maij = (Mat_MPIAIJ*) (*mat)->data;
5631   maij->donotstash     = PETSC_TRUE;
5632   (*mat)->preallocated = PETSC_TRUE;
5633 
5634   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5635   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5636   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5637   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5638 
5639   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5640   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5641 
5642   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5643   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5644   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5645   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5646 
5647   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5648   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   PetscFunctionReturn(0);
5650 }
5651 
5652 /*
5653     Special version for direct calls from Fortran
5654 */
5655 #include <private/fortranimpl.h>
5656 
5657 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5658 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5659 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5660 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5661 #endif
5662 
5663 /* Change these macros so can be used in void function */
5664 #undef CHKERRQ
5665 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5666 #undef SETERRQ2
5667 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5668 #undef SETERRQ3
5669 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5670 #undef SETERRQ
5671 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5672 
5673 EXTERN_C_BEGIN
5674 #undef __FUNCT__
5675 #define __FUNCT__ "matsetvaluesmpiaij_"
5676 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5677 {
5678   Mat             mat = *mmat;
5679   PetscInt        m = *mm, n = *mn;
5680   InsertMode      addv = *maddv;
5681   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5682   PetscScalar     value;
5683   PetscErrorCode  ierr;
5684 
5685   MatCheckPreallocated(mat,1);
5686   if (mat->insertmode == NOT_SET_VALUES) {
5687     mat->insertmode = addv;
5688   }
5689 #if defined(PETSC_USE_DEBUG)
5690   else if (mat->insertmode != addv) {
5691     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5692   }
5693 #endif
5694   {
5695   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5696   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5697   PetscBool       roworiented = aij->roworiented;
5698 
5699   /* Some Variables required in the macro */
5700   Mat             A = aij->A;
5701   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5702   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5703   MatScalar       *aa = a->a;
5704   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5705   Mat             B = aij->B;
5706   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5707   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5708   MatScalar       *ba = b->a;
5709 
5710   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5711   PetscInt        nonew = a->nonew;
5712   MatScalar       *ap1,*ap2;
5713 
5714   PetscFunctionBegin;
5715   for (i=0; i<m; i++) {
5716     if (im[i] < 0) continue;
5717 #if defined(PETSC_USE_DEBUG)
5718     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5719 #endif
5720     if (im[i] >= rstart && im[i] < rend) {
5721       row      = im[i] - rstart;
5722       lastcol1 = -1;
5723       rp1      = aj + ai[row];
5724       ap1      = aa + ai[row];
5725       rmax1    = aimax[row];
5726       nrow1    = ailen[row];
5727       low1     = 0;
5728       high1    = nrow1;
5729       lastcol2 = -1;
5730       rp2      = bj + bi[row];
5731       ap2      = ba + bi[row];
5732       rmax2    = bimax[row];
5733       nrow2    = bilen[row];
5734       low2     = 0;
5735       high2    = nrow2;
5736 
5737       for (j=0; j<n; j++) {
5738         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5739         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5740         if (in[j] >= cstart && in[j] < cend){
5741           col = in[j] - cstart;
5742           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5743         } else if (in[j] < 0) continue;
5744 #if defined(PETSC_USE_DEBUG)
5745         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5746 #endif
5747         else {
5748           if (mat->was_assembled) {
5749             if (!aij->colmap) {
5750               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5751             }
5752 #if defined (PETSC_USE_CTABLE)
5753             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5754 	    col--;
5755 #else
5756             col = aij->colmap[in[j]] - 1;
5757 #endif
5758             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5759               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5760               col =  in[j];
5761               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5762               B = aij->B;
5763               b = (Mat_SeqAIJ*)B->data;
5764               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5765               rp2      = bj + bi[row];
5766               ap2      = ba + bi[row];
5767               rmax2    = bimax[row];
5768               nrow2    = bilen[row];
5769               low2     = 0;
5770               high2    = nrow2;
5771               bm       = aij->B->rmap->n;
5772               ba = b->a;
5773             }
5774           } else col = in[j];
5775           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5776         }
5777       }
5778     } else {
5779       if (!aij->donotstash) {
5780         if (roworiented) {
5781           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5782         } else {
5783           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5784         }
5785       }
5786     }
5787   }}
5788   PetscFunctionReturnVoid();
5789 }
5790 EXTERN_C_END
5791 
5792