xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dcca6d9d80ebd869fe6029bd05a3aa9faafef49e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
747   PetscInt      *owners = A->rmap->range;
748   PetscInt       n      = A->rmap->n;
749   PetscMPIInt    size   = mat->size;
750   PetscSF        sf;
751   PetscInt      *lrows;
752   PetscSFNode   *rrows;
753   PetscInt       lastidx = -1, r, p = 0, len = 0;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create SF where leaves are input rows and roots are owned rows */
758   ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr);
759   for (r = 0; r < n; ++r) lrows[r] = -1;
760   ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr);
761   for (r = 0; r < N; ++r) {
762     const PetscInt idx   = rows[r];
763     PetscBool      found = PETSC_FALSE;
764     /* Trick for efficient searching for sorted rows */
765     if (lastidx > idx) p = 0;
766     lastidx = idx;
767     for (; p < size; ++p) {
768       if (idx >= owners[p] && idx < owners[p+1]) {
769         rrows[r].rank  = p;
770         rrows[r].index = rows[r] - owners[p];
771         found = PETSC_TRUE;
772         break;
773       }
774     }
775     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
776   }
777   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
778   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
779   /* Collect flags for rows to be zeroed */
780   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
781   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
783   /* Compress and put in row numbers */
784   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
795   }
796   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
797   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
798   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
799     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
800   } else if (diag != 0.0) {
801     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
803     for (r = 0; r < len; ++r) {
804       const PetscInt row = lrows[r] + A->rmap->rstart;
805       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
806     }
807     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809   } else {
810     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(lrows);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
818 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
819 {
820   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
821   PetscErrorCode    ierr;
822   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
823   PetscInt          i,*owners = A->rmap->range;
824   PetscInt          *nprocs,j,idx,nsends;
825   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
826   PetscInt          *rvalues,count,base,slen,*source;
827   PetscInt          *lens,*lrows,*values,m;
828   MPI_Comm          comm;
829   MPI_Request       *send_waits,*recv_waits;
830   MPI_Status        recv_status,*send_status;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
843   /*  first count number of contributors to each processor */
844   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
845   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
846   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
847   j    = 0;
848   for (i=0; i<N; i++) {
849     if (lastidx > (idx = rows[i])) j = 0;
850     lastidx = idx;
851     for (; j<size; j++) {
852       if (idx >= owners[j] && idx < owners[j+1]) {
853         nprocs[2*j]++;
854         nprocs[2*j+1] = 1;
855         owner[i]      = j;
856 #if defined(PETSC_DEBUG)
857         found = PETSC_TRUE;
858 #endif
859         break;
860       }
861     }
862 #if defined(PETSC_DEBUG)
863     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
864     found = PETSC_FALSE;
865 #endif
866   }
867   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
868 
869   /* inform other processors of number of messages and max length*/
870   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
871 
872   /* post receives:   */
873   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
874   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
875   for (i=0; i<nrecvs; i++) {
876     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
877   }
878 
879   /* do sends:
880       1) starts[i] gives the starting index in svalues for stuff going to
881          the ith processor
882   */
883   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
884   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
885   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
886 
887   starts[0] = 0;
888   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
889   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
890 
891   starts[0] = 0;
892   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
893   count = 0;
894   for (i=0; i<size; i++) {
895     if (nprocs[2*i+1]) {
896       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
897     }
898   }
899   ierr = PetscFree(starts);CHKERRQ(ierr);
900 
901   base = owners[rank];
902 
903   /*  wait on receives */
904   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
905   count = nrecvs; slen = 0;
906   while (count) {
907     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
908     /* unpack receives into our local space */
909     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
910 
911     source[imdex] = recv_status.MPI_SOURCE;
912     lens[imdex]   = n;
913     slen         += n;
914     count--;
915   }
916   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
917 
918   /* move the data into the send scatter */
919   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
920   count = 0;
921   for (i=0; i<nrecvs; i++) {
922     values = rvalues + i*nmax;
923     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
924   }
925   ierr = PetscFree(rvalues);CHKERRQ(ierr);
926   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
927   ierr = PetscFree(owner);CHKERRQ(ierr);
928   ierr = PetscFree(nprocs);CHKERRQ(ierr);
929   /* lrows are the local rows to be zeroed, slen is the number of local rows */
930 
931   /* zero diagonal part of matrix */
932   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
933 
934   /* handle off diagonal part of matrix */
935   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
936   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
937   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
938   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
939   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
940   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
943   if (x) {
944     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
947     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
948   }
949   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
950 
951   /* remove zeroed rows of off diagonal matrix */
952   ii = aij->i;
953   for (i=0; i<slen; i++) {
954     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
955   }
956 
957   /* loop over all elements of off process part of matrix zeroing removed columns*/
958   if (aij->compressedrow.use) {
959     m    = aij->compressedrow.nrows;
960     ii   = aij->compressedrow.i;
961     ridx = aij->compressedrow.rindex;
962     for (i=0; i<m; i++) {
963       n  = ii[i+1] - ii[i];
964       aj = aij->j + ii[i];
965       aa = aij->a + ii[i];
966 
967       for (j=0; j<n; j++) {
968         if (PetscAbsScalar(mask[*aj])) {
969           if (b) bb[*ridx] -= *aa*xx[*aj];
970           *aa = 0.0;
971         }
972         aa++;
973         aj++;
974       }
975       ridx++;
976     }
977   } else { /* do not use compressed row format */
978     m = l->B->rmap->n;
979     for (i=0; i<m; i++) {
980       n  = ii[i+1] - ii[i];
981       aj = aij->j + ii[i];
982       aa = aij->a + ii[i];
983       for (j=0; j<n; j++) {
984         if (PetscAbsScalar(mask[*aj])) {
985           if (b) bb[i] -= *aa*xx[*aj];
986           *aa = 0.0;
987         }
988         aa++;
989         aj++;
990       }
991     }
992   }
993   if (x) {
994     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
995     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
996   }
997   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
998   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
999   ierr = PetscFree(lrows);CHKERRQ(ierr);
1000 
1001   /* wait on sends */
1002   if (nsends) {
1003     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1004     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1005     ierr = PetscFree(send_status);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1008   ierr = PetscFree(svalues);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMult_MPIAIJ"
1014 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018   PetscInt       nt;
1019 
1020   PetscFunctionBegin;
1021   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1022   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1023   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1032 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1033 {
1034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1035   PetscErrorCode ierr;
1036 
1037   PetscFunctionBegin;
1038   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1044 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1045 {
1046   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1047   PetscErrorCode ierr;
1048 
1049   PetscFunctionBegin;
1050   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1052   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1059 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063   PetscBool      merged;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1067   /* do nondiagonal part */
1068   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1069   if (!merged) {
1070     /* send it on its way */
1071     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072     /* do local part */
1073     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1074     /* receive remote parts: note this assumes the values are not actually */
1075     /* added in yy until the next line, */
1076     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   } else {
1078     /* do local part */
1079     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1080     /* send it on its way */
1081     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082     /* values actually were received in the Begin() but we need to call this nop */
1083     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1090 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1091 {
1092   MPI_Comm       comm;
1093   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1094   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1095   IS             Me,Notme;
1096   PetscErrorCode ierr;
1097   PetscInt       M,N,first,last,*notme,i;
1098   PetscMPIInt    size;
1099 
1100   PetscFunctionBegin;
1101   /* Easy test: symmetric diagonal block */
1102   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1103   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1104   if (!*f) PetscFunctionReturn(0);
1105   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1107   if (size == 1) PetscFunctionReturn(0);
1108 
1109   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1110   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1111   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1112   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1113   for (i=0; i<first; i++) notme[i] = i;
1114   for (i=last; i<M; i++) notme[i-last+first] = i;
1115   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1116   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1117   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1118   Aoff = Aoffs[0];
1119   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1120   Boff = Boffs[0];
1121   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1122   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1123   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1124   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1125   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1126   ierr = PetscFree(notme);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1132 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   /* do nondiagonal part */
1139   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1140   /* send it on its way */
1141   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   /* do local part */
1143   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1144   /* receive remote parts */
1145   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 /*
1150   This only works correctly for square matrices where the subblock A->A is the
1151    diagonal block
1152 */
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1156 {
1157   PetscErrorCode ierr;
1158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1159 
1160   PetscFunctionBegin;
1161   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1162   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1163   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatScale_MPIAIJ"
1169 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1170 {
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1176   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatDestroy_MPIAIJ"
1182 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1183 {
1184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188 #if defined(PETSC_USE_LOG)
1189   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1190 #endif
1191   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1192   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1193   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1194   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1195 #if defined(PETSC_USE_CTABLE)
1196   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1197 #else
1198   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1199 #endif
1200   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1202   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1203   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1204   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1205   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1206 
1207   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1221 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1222 {
1223   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1224   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1225   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1226   PetscErrorCode ierr;
1227   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1228   int            fd;
1229   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1230   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1231   PetscScalar    *column_values;
1232   PetscInt       message_count,flowcontrolcount;
1233   FILE           *file;
1234 
1235   PetscFunctionBegin;
1236   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1237   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1238   nz   = A->nz + B->nz;
1239   if (!rank) {
1240     header[0] = MAT_FILE_CLASSID;
1241     header[1] = mat->rmap->N;
1242     header[2] = mat->cmap->N;
1243 
1244     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1246     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     /* get largest number of rows any processor has */
1248     rlen  = mat->rmap->n;
1249     range = mat->rmap->range;
1250     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1251   } else {
1252     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253     rlen = mat->rmap->n;
1254   }
1255 
1256   /* load up the local row counts */
1257   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1258   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1259 
1260   /* store the row lengths to the file */
1261   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1262   if (!rank) {
1263     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       rlen = range[i+1] - range[i];
1267       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1269     }
1270     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1271   } else {
1272     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1273     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1274     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1275   }
1276   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1277 
1278   /* load up the local column indices */
1279   nzmax = nz; /* th processor needs space a largest processor needs */
1280   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1282   cnt   = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if ((col = garray[B->j[j]]) > cstart) break;
1286       column_indices[cnt++] = col;
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1289     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column indices to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1313 
1314   /* load up the local column values */
1315   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1316   cnt  = 0;
1317   for (i=0; i<mat->rmap->n; i++) {
1318     for (j=B->i[i]; j<B->i[i+1]; j++) {
1319       if (garray[B->j[j]] > cstart) break;
1320       column_values[cnt++] = B->a[j];
1321     }
1322     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1323     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1324   }
1325   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1326 
1327   /* store the column values to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     MPI_Status status;
1331     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1335       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1336       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1342     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1343     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(column_values);CHKERRQ(ierr);
1347 
1348   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1349   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #include <petscdraw.h>
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1356 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1357 {
1358   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1359   PetscErrorCode    ierr;
1360   PetscMPIInt       rank = aij->rank,size = aij->size;
1361   PetscBool         isdraw,iascii,isbinary;
1362   PetscViewer       sviewer;
1363   PetscViewerFormat format;
1364 
1365   PetscFunctionBegin;
1366   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1367   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1368   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1369   if (iascii) {
1370     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1371     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1372       MatInfo   info;
1373       PetscBool inodes;
1374 
1375       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1376       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1377       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1378       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1379       if (!inodes) {
1380         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1381                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1382       } else {
1383         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1384                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1385       }
1386       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1387       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1388       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1389       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1390       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1391       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1392       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1393       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1394       PetscFunctionReturn(0);
1395     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1396       PetscInt inodecount,inodelimit,*inodes;
1397       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1398       if (inodes) {
1399         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1400       } else {
1401         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1402       }
1403       PetscFunctionReturn(0);
1404     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1405       PetscFunctionReturn(0);
1406     }
1407   } else if (isbinary) {
1408     if (size == 1) {
1409       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1410       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1411     } else {
1412       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1413     }
1414     PetscFunctionReturn(0);
1415   } else if (isdraw) {
1416     PetscDraw draw;
1417     PetscBool isnull;
1418     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1419     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1420   }
1421 
1422   if (size == 1) {
1423     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1424     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1425   } else {
1426     /* assemble the entire matrix onto first processor. */
1427     Mat        A;
1428     Mat_SeqAIJ *Aloc;
1429     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1430     MatScalar  *a;
1431 
1432     if (mat->rmap->N > 1024) {
1433       PetscBool flg = PETSC_FALSE;
1434 
1435       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1436       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1437     }
1438 
1439     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1442     } else {
1443       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1444     }
1445     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1446     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1447     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1448     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1449     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1450 
1451     /* copy over the A part */
1452     Aloc = (Mat_SeqAIJ*)aij->A->data;
1453     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1454     row  = mat->rmap->rstart;
1455     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1456     for (i=0; i<m; i++) {
1457       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1458       row++;
1459       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1460     }
1461     aj = Aloc->j;
1462     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1463 
1464     /* copy over the B part */
1465     Aloc = (Mat_SeqAIJ*)aij->B->data;
1466     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1467     row  = mat->rmap->rstart;
1468     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1469     ct   = cols;
1470     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1471     for (i=0; i<m; i++) {
1472       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1473       row++;
1474       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1475     }
1476     ierr = PetscFree(ct);CHKERRQ(ierr);
1477     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1479     /*
1480        Everyone has to call to draw the matrix since the graphics waits are
1481        synchronized across all processors that share the PetscDraw object
1482     */
1483     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1484     if (!rank) {
1485       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1486       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1487       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1488       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1489     }
1490     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1491     ierr = MatDestroy(&A);CHKERRQ(ierr);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatView_MPIAIJ"
1498 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1499 {
1500   PetscErrorCode ierr;
1501   PetscBool      iascii,isdraw,issocket,isbinary;
1502 
1503   PetscFunctionBegin;
1504   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1505   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1506   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1507   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1508   if (iascii || isdraw || isbinary || issocket) {
1509     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatSOR_MPIAIJ"
1516 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1517 {
1518   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1519   PetscErrorCode ierr;
1520   Vec            bb1 = 0;
1521   PetscBool      hasop;
1522 
1523   PetscFunctionBegin;
1524   if (flag == SOR_APPLY_UPPER) {
1525     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1526     PetscFunctionReturn(0);
1527   }
1528 
1529   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1530     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1531   }
1532 
1533   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1534     if (flag & SOR_ZERO_INITIAL_GUESS) {
1535       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1536       its--;
1537     }
1538 
1539     while (its--) {
1540       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1542 
1543       /* update rhs: bb1 = bb - B*x */
1544       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1545       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1546 
1547       /* local sweep */
1548       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1549     }
1550   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1551     if (flag & SOR_ZERO_INITIAL_GUESS) {
1552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1553       its--;
1554     }
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_EISENSTAT) {
1583     Vec xx1;
1584 
1585     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1586     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1587 
1588     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590     if (!mat->diag) {
1591       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1592       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1593     }
1594     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1595     if (hasop) {
1596       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1597     } else {
1598       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1599     }
1600     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1601 
1602     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1603 
1604     /* local sweep */
1605     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1606     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1607     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1608   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1609 
1610   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatPermute_MPIAIJ"
1616 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1617 {
1618   Mat            aA,aB,Aperm;
1619   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1620   PetscScalar    *aa,*ba;
1621   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1622   PetscSF        rowsf,sf;
1623   IS             parcolp = NULL;
1624   PetscBool      done;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1629   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1630   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1631   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1632 
1633   /* Invert row permutation to find out where my rows should go */
1634   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1635   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1636   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1638   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1639   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1640 
1641   /* Invert column permutation to find out where my columns should go */
1642   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1643   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1644   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1645   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1646   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1647   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1649 
1650   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1651   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1652   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1653 
1654   /* Find out where my gcols should go */
1655   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1656   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1657   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1658   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1659   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1660   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1661   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = PetscMalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1665   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1666   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1667   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1668   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1669   for (i=0; i<m; i++) {
1670     PetscInt row = rdest[i],rowner;
1671     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1672     for (j=ai[i]; j<ai[i+1]; j++) {
1673       PetscInt cowner,col = cdest[aj[j]];
1674       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1675       if (rowner == cowner) dnnz[i]++;
1676       else onnz[i]++;
1677     }
1678     for (j=bi[i]; j<bi[i+1]; j++) {
1679       PetscInt cowner,col = gcdest[bj[j]];
1680       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1681       if (rowner == cowner) dnnz[i]++;
1682       else onnz[i]++;
1683     }
1684   }
1685   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1686   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1687   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1688   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1689   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1690   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1691   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1692 
1693   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1694   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1695   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1696   for (i=0; i<m; i++) {
1697     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1698     PetscInt rowlen;
1699     rowlen = ai[i+1] - ai[i];
1700     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1701     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1702     rowlen = bi[i+1] - bi[i];
1703     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1704     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1705   }
1706   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1708   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1709   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1710   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1711   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1712   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1713   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1714   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1715   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1716   *B = Aperm;
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1722 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1723 {
1724   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1725   Mat            A    = mat->A,B = mat->B;
1726   PetscErrorCode ierr;
1727   PetscReal      isend[5],irecv[5];
1728 
1729   PetscFunctionBegin;
1730   info->block_size = 1.0;
1731   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1732 
1733   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1734   isend[3] = info->memory;  isend[4] = info->mallocs;
1735 
1736   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1737 
1738   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1739   isend[3] += info->memory;  isend[4] += info->mallocs;
1740   if (flag == MAT_LOCAL) {
1741     info->nz_used      = isend[0];
1742     info->nz_allocated = isend[1];
1743     info->nz_unneeded  = isend[2];
1744     info->memory       = isend[3];
1745     info->mallocs      = isend[4];
1746   } else if (flag == MAT_GLOBAL_MAX) {
1747     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1748 
1749     info->nz_used      = irecv[0];
1750     info->nz_allocated = irecv[1];
1751     info->nz_unneeded  = irecv[2];
1752     info->memory       = irecv[3];
1753     info->mallocs      = irecv[4];
1754   } else if (flag == MAT_GLOBAL_SUM) {
1755     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1756 
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   }
1763   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1764   info->fill_ratio_needed = 0;
1765   info->factor_mallocs    = 0;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatSetOption_MPIAIJ"
1771 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1772 {
1773   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   switch (op) {
1778   case MAT_NEW_NONZERO_LOCATIONS:
1779   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1780   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1781   case MAT_KEEP_NONZERO_PATTERN:
1782   case MAT_NEW_NONZERO_LOCATION_ERR:
1783   case MAT_USE_INODES:
1784   case MAT_IGNORE_ZERO_ENTRIES:
1785     MatCheckPreallocated(A,1);
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_ROW_ORIENTED:
1790     a->roworiented = flg;
1791 
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_NEW_DIAGONALS:
1796     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1797     break;
1798   case MAT_IGNORE_OFF_PROC_ENTRIES:
1799     a->donotstash = flg;
1800     break;
1801   case MAT_SPD:
1802     A->spd_set = PETSC_TRUE;
1803     A->spd     = flg;
1804     if (flg) {
1805       A->symmetric                  = PETSC_TRUE;
1806       A->structurally_symmetric     = PETSC_TRUE;
1807       A->symmetric_set              = PETSC_TRUE;
1808       A->structurally_symmetric_set = PETSC_TRUE;
1809     }
1810     break;
1811   case MAT_SYMMETRIC:
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_STRUCTURALLY_SYMMETRIC:
1815     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_HERMITIAN:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_SYMMETRY_ETERNAL:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   default:
1824     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1825   }
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatGetRow_MPIAIJ"
1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1834   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1835   PetscErrorCode ierr;
1836   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1837   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1838   PetscInt       *cmap,*idx_p;
1839 
1840   PetscFunctionBegin;
1841   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1842   mat->getrowactive = PETSC_TRUE;
1843 
1844   if (!mat->rowvalues && (idx || v)) {
1845     /*
1846         allocate enough space to hold information from the longest row.
1847     */
1848     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1849     PetscInt   max = 1,tmp;
1850     for (i=0; i<matin->rmap->n; i++) {
1851       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1852       if (max < tmp) max = tmp;
1853     }
1854     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1855   }
1856 
1857   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1858   lrow = row - rstart;
1859 
1860   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1861   if (!v)   {pvA = 0; pvB = 0;}
1862   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1863   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1864   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1865   nztot = nzA + nzB;
1866 
1867   cmap = mat->garray;
1868   if (v  || idx) {
1869     if (nztot) {
1870       /* Sort by increasing column numbers, assuming A and B already sorted */
1871       PetscInt imark = -1;
1872       if (v) {
1873         *v = v_p = mat->rowvalues;
1874         for (i=0; i<nzB; i++) {
1875           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1876           else break;
1877         }
1878         imark = i;
1879         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1880         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1881       }
1882       if (idx) {
1883         *idx = idx_p = mat->rowindices;
1884         if (imark > -1) {
1885           for (i=0; i<imark; i++) {
1886             idx_p[i] = cmap[cworkB[i]];
1887           }
1888         } else {
1889           for (i=0; i<nzB; i++) {
1890             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1891             else break;
1892           }
1893           imark = i;
1894         }
1895         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1896         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1897       }
1898     } else {
1899       if (idx) *idx = 0;
1900       if (v)   *v   = 0;
1901     }
1902   }
1903   *nz  = nztot;
1904   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1905   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1912 {
1913   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1914 
1915   PetscFunctionBegin;
1916   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1917   aij->getrowactive = PETSC_FALSE;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatNorm_MPIAIJ"
1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1924 {
1925   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1926   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1927   PetscErrorCode ierr;
1928   PetscInt       i,j,cstart = mat->cmap->rstart;
1929   PetscReal      sum = 0.0;
1930   MatScalar      *v;
1931 
1932   PetscFunctionBegin;
1933   if (aij->size == 1) {
1934     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1935   } else {
1936     if (type == NORM_FROBENIUS) {
1937       v = amat->a;
1938       for (i=0; i<amat->nz; i++) {
1939         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1940       }
1941       v = bmat->a;
1942       for (i=0; i<bmat->nz; i++) {
1943         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1944       }
1945       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1946       *norm = PetscSqrtReal(*norm);
1947     } else if (type == NORM_1) { /* max column norm */
1948       PetscReal *tmp,*tmp2;
1949       PetscInt  *jj,*garray = aij->garray;
1950       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1951       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1952       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1953       *norm = 0.0;
1954       v     = amat->a; jj = amat->j;
1955       for (j=0; j<amat->nz; j++) {
1956         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1957       }
1958       v = bmat->a; jj = bmat->j;
1959       for (j=0; j<bmat->nz; j++) {
1960         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1961       }
1962       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1963       for (j=0; j<mat->cmap->N; j++) {
1964         if (tmp2[j] > *norm) *norm = tmp2[j];
1965       }
1966       ierr = PetscFree(tmp);CHKERRQ(ierr);
1967       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1968     } else if (type == NORM_INFINITY) { /* max row norm */
1969       PetscReal ntemp = 0.0;
1970       for (j=0; j<aij->A->rmap->n; j++) {
1971         v   = amat->a + amat->i[j];
1972         sum = 0.0;
1973         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1974           sum += PetscAbsScalar(*v); v++;
1975         }
1976         v = bmat->a + bmat->i[j];
1977         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1978           sum += PetscAbsScalar(*v); v++;
1979         }
1980         if (sum > ntemp) ntemp = sum;
1981       }
1982       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1983     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatTranspose_MPIAIJ"
1990 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1991 {
1992   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1993   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1994   PetscErrorCode ierr;
1995   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1996   PetscInt       cstart = A->cmap->rstart,ncol;
1997   Mat            B;
1998   MatScalar      *array;
1999 
2000   PetscFunctionBegin;
2001   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2002 
2003   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2004   ai = Aloc->i; aj = Aloc->j;
2005   bi = Bloc->i; bj = Bloc->j;
2006   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2007     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2008     PetscSFNode          *oloc;
2009     PETSC_UNUSED PetscSF sf;
2010 
2011     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2012     /* compute d_nnz for preallocation */
2013     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2014     for (i=0; i<ai[ma]; i++) {
2015       d_nnz[aj[i]]++;
2016       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2017     }
2018     /* compute local off-diagonal contributions */
2019     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2021     /* map those to global */
2022     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2023     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2024     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2025     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2026     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2027     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2028     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2029 
2030     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2033     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2034     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2035     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2036   } else {
2037     B    = *matout;
2038     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2040   }
2041 
2042   /* copy over the A part */
2043   array = Aloc->a;
2044   row   = A->rmap->rstart;
2045   for (i=0; i<ma; i++) {
2046     ncol = ai[i+1]-ai[i];
2047     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2048     row++;
2049     array += ncol; aj += ncol;
2050   }
2051   aj = Aloc->j;
2052   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2053 
2054   /* copy over the B part */
2055   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2056   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2057   array = Bloc->a;
2058   row   = A->rmap->rstart;
2059   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2060   cols_tmp = cols;
2061   for (i=0; i<mb; i++) {
2062     ncol = bi[i+1]-bi[i];
2063     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2064     row++;
2065     array += ncol; cols_tmp += ncol;
2066   }
2067   ierr = PetscFree(cols);CHKERRQ(ierr);
2068 
2069   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2072     *matout = B;
2073   } else {
2074     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2081 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2082 {
2083   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2084   Mat            a    = aij->A,b = aij->B;
2085   PetscErrorCode ierr;
2086   PetscInt       s1,s2,s3;
2087 
2088   PetscFunctionBegin;
2089   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2090   if (rr) {
2091     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2092     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2093     /* Overlap communication with computation. */
2094     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2095   }
2096   if (ll) {
2097     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2098     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2099     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2100   }
2101   /* scale  the diagonal block */
2102   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2103 
2104   if (rr) {
2105     /* Do a scatter end and then right scale the off-diagonal block */
2106     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2107     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2115 {
2116   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatEqual_MPIAIJ"
2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2127 {
2128   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2129   Mat            a,b,c,d;
2130   PetscBool      flg;
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   a = matA->A; b = matA->B;
2135   c = matB->A; d = matB->B;
2136 
2137   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2138   if (flg) {
2139     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2140   }
2141   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatCopy_MPIAIJ"
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatSetUp_MPIAIJ"
2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2172 {
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,m=Y->rmap->N;
2186   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2187   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2188   const PetscInt *xi = x->i,*yi = y->i;
2189 
2190   PetscFunctionBegin;
2191   /* Set the number of nonzeros in the new matrix */
2192   for (i=0; i<m; i++) {
2193     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2194     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2195     nnz[i] = 0;
2196     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2197       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2198       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2199       nnz[i]++;
2200     }
2201     for (; k<nzy; k++) nnz[i]++;
2202   }
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPY_MPIAIJ"
2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2209 {
2210   PetscErrorCode ierr;
2211   PetscInt       i;
2212   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2213   PetscBLASInt   bnz,one=1;
2214   Mat_SeqAIJ     *x,*y;
2215 
2216   PetscFunctionBegin;
2217   if (str == SAME_NONZERO_PATTERN) {
2218     PetscScalar alpha = a;
2219     x    = (Mat_SeqAIJ*)xx->A->data;
2220     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2221     y    = (Mat_SeqAIJ*)yy->A->data;
2222     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2223     x    = (Mat_SeqAIJ*)xx->B->data;
2224     y    = (Mat_SeqAIJ*)yy->B->data;
2225     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2226     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2227   } else if (str == SUBSET_NONZERO_PATTERN) {
2228     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2229 
2230     x = (Mat_SeqAIJ*)xx->B->data;
2231     y = (Mat_SeqAIJ*)yy->B->data;
2232     if (y->xtoy && y->XtoY != xx->B) {
2233       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2234       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2235     }
2236     if (!y->xtoy) { /* get xtoy */
2237       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2238       y->XtoY = xx->B;
2239       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2240     }
2241     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatConjugate_MPIAIJ"
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatRealPart_MPIAIJ"
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2304   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #if defined(PETSC_HAVE_PBGL)
2309 
2310 #include <boost/parallel/mpi/bsp_process_group.hpp>
2311 #include <boost/graph/distributed/ilu_default_graph.hpp>
2312 #include <boost/graph/distributed/ilu_0_block.hpp>
2313 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314 #include <boost/graph/distributed/petsc/interface.hpp>
2315 #include <boost/multi_array.hpp>
2316 #include <boost/parallel/distributed_property_map->hpp>
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324 {
2325   namespace petsc = boost::distributed::petsc;
2326 
2327   namespace graph_dist = boost::graph::distributed;
2328   using boost::graph::distributed::ilu_default::process_group_type;
2329   using boost::graph::ilu_permuted;
2330 
2331   PetscBool      row_identity, col_identity;
2332   PetscContainer c;
2333   PetscInt       m, n, M, N;
2334   PetscErrorCode ierr;
2335 
2336   PetscFunctionBegin;
2337   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2339   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2340   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2341 
2342   process_group_type pg;
2343   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2344   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2345   lgraph_type& level_graph = *lgraph_p;
2346   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2347 
2348   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2349   ilu_permuted(level_graph);
2350 
2351   /* put together the new matrix */
2352   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2353   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2354   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2355   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2356   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2357   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2358   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360 
2361   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2362   ierr = PetscContainerSetPointer(c, lgraph_p);
2363   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2364   ierr = PetscContainerDestroy(&c);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2371 {
2372   PetscFunctionBegin;
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatSolve_MPIAIJ"
2378 /*
2379   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2380 */
2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2382 {
2383   namespace graph_dist = boost::graph::distributed;
2384 
2385   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2386   lgraph_type    *lgraph_p;
2387   PetscContainer c;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2392   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2393   ierr = VecCopy(b, x);CHKERRQ(ierr);
2394 
2395   PetscScalar *array_x;
2396   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2397   PetscInt sx;
2398   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2399 
2400   PetscScalar *array_b;
2401   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2402   PetscInt sb;
2403   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2404 
2405   lgraph_type& level_graph = *lgraph_p;
2406   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2407 
2408   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2409   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2410   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2411 
2412   typedef boost::iterator_property_map<array_ref_type::iterator,
2413                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2414   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2415   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2416 
2417   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2418   PetscFunctionReturn(0);
2419 }
2420 #endif
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatDestroy_MatRedundant"
2424 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2425 {
2426   PetscErrorCode ierr;
2427   Mat_Redundant  *redund;
2428   PetscInt       i;
2429   PetscMPIInt    size;
2430 
2431   PetscFunctionBegin;
2432   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2433   if (size == 1) {
2434     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2435     redund = a->redundant;
2436   } else {
2437     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2438     redund = a->redundant;
2439   }
2440   if (redund){
2441     if (redund->matseq) { /* via MatGetSubMatrices()  */
2442       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2443       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2444       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2445       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2446     } else {
2447       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2448       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2449       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2450       for (i=0; i<redund->nrecvs; i++) {
2451         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2452         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2453       }
2454       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2455     }
2456 
2457     if (redund->psubcomm) {
2458       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2459     }
2460     ierr = redund->Destroy(A);CHKERRQ(ierr);
2461     ierr = PetscFree(redund);CHKERRQ(ierr);
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2468 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2469 {
2470   PetscMPIInt    rank,size;
2471   MPI_Comm       comm;
2472   PetscErrorCode ierr;
2473   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2474   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2475   PetscInt       *rowrange = mat->rmap->range;
2476   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2477   Mat            A = aij->A,B=aij->B,C=*matredundant;
2478   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2479   PetscScalar    *sbuf_a;
2480   PetscInt       nzlocal=a->nz+b->nz;
2481   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2482   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2483   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2484   MatScalar      *aworkA,*aworkB;
2485   PetscScalar    *vals;
2486   PetscMPIInt    tag1,tag2,tag3,imdex;
2487   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2488   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2489   MPI_Status     recv_status,*send_status;
2490   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2491   PetscInt       **rbuf_j=NULL;
2492   PetscScalar    **rbuf_a=NULL;
2493   Mat_Redundant  *redund =NULL;
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2497   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2498   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2499   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2500   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2501 
2502   if (reuse == MAT_REUSE_MATRIX) {
2503     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2504     if (subsize == 1) {
2505       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2506       redund = c->redundant;
2507     } else {
2508       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2509       redund = c->redundant;
2510     }
2511     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2512 
2513     nsends    = redund->nsends;
2514     nrecvs    = redund->nrecvs;
2515     send_rank = redund->send_rank;
2516     recv_rank = redund->recv_rank;
2517     sbuf_nz   = redund->sbuf_nz;
2518     rbuf_nz   = redund->rbuf_nz;
2519     sbuf_j    = redund->sbuf_j;
2520     sbuf_a    = redund->sbuf_a;
2521     rbuf_j    = redund->rbuf_j;
2522     rbuf_a    = redund->rbuf_a;
2523   }
2524 
2525   if (reuse == MAT_INITIAL_MATRIX) {
2526     PetscInt    nleftover,np_subcomm;
2527 
2528     /* get the destination processors' id send_rank, nsends and nrecvs */
2529     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2530 
2531     np_subcomm = size/nsubcomm;
2532     nleftover  = size - nsubcomm*np_subcomm;
2533 
2534     /* block of codes below is specific for INTERLACED */
2535     /* ------------------------------------------------*/
2536     nsends = 0; nrecvs = 0;
2537     for (i=0; i<size; i++) {
2538       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2539         send_rank[nsends++] = i;
2540         recv_rank[nrecvs++] = i;
2541       }
2542     }
2543     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2544       i = size-nleftover-1;
2545       j = 0;
2546       while (j < nsubcomm - nleftover) {
2547         send_rank[nsends++] = i;
2548         i--; j++;
2549       }
2550     }
2551 
2552     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2553       for (i=0; i<nleftover; i++) {
2554         recv_rank[nrecvs++] = size-nleftover+i;
2555       }
2556     }
2557     /*----------------------------------------------*/
2558 
2559     /* allocate sbuf_j, sbuf_a */
2560     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2561     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2562     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2563     /*
2564     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2565     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2566      */
2567   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2568 
2569   /* copy mat's local entries into the buffers */
2570   if (reuse == MAT_INITIAL_MATRIX) {
2571     rownz_max = 0;
2572     rptr      = sbuf_j;
2573     cols      = sbuf_j + rend-rstart + 1;
2574     vals      = sbuf_a;
2575     rptr[0]   = 0;
2576     for (i=0; i<rend-rstart; i++) {
2577       row    = i + rstart;
2578       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2579       ncols  = nzA + nzB;
2580       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2581       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2582       /* load the column indices for this row into cols */
2583       lwrite = 0;
2584       for (l=0; l<nzB; l++) {
2585         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2586           vals[lwrite]   = aworkB[l];
2587           cols[lwrite++] = ctmp;
2588         }
2589       }
2590       for (l=0; l<nzA; l++) {
2591         vals[lwrite]   = aworkA[l];
2592         cols[lwrite++] = cstart + cworkA[l];
2593       }
2594       for (l=0; l<nzB; l++) {
2595         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2596           vals[lwrite]   = aworkB[l];
2597           cols[lwrite++] = ctmp;
2598         }
2599       }
2600       vals     += ncols;
2601       cols     += ncols;
2602       rptr[i+1] = rptr[i] + ncols;
2603       if (rownz_max < ncols) rownz_max = ncols;
2604     }
2605     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2606   } else { /* only copy matrix values into sbuf_a */
2607     rptr    = sbuf_j;
2608     vals    = sbuf_a;
2609     rptr[0] = 0;
2610     for (i=0; i<rend-rstart; i++) {
2611       row    = i + rstart;
2612       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2613       ncols  = nzA + nzB;
2614       cworkB = b->j + b->i[i];
2615       aworkA = a->a + a->i[i];
2616       aworkB = b->a + b->i[i];
2617       lwrite = 0;
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2620       }
2621       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2622       for (l=0; l<nzB; l++) {
2623         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2624       }
2625       vals     += ncols;
2626       rptr[i+1] = rptr[i] + ncols;
2627     }
2628   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2629 
2630   /* send nzlocal to others, and recv other's nzlocal */
2631   /*--------------------------------------------------*/
2632   if (reuse == MAT_INITIAL_MATRIX) {
2633     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2634 
2635     s_waits2 = s_waits3 + nsends;
2636     s_waits1 = s_waits2 + nsends;
2637     r_waits1 = s_waits1 + nsends;
2638     r_waits2 = r_waits1 + nrecvs;
2639     r_waits3 = r_waits2 + nrecvs;
2640   } else {
2641     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2642 
2643     r_waits3 = s_waits3 + nsends;
2644   }
2645 
2646   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2647   if (reuse == MAT_INITIAL_MATRIX) {
2648     /* get new tags to keep the communication clean */
2649     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2650     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2651     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2652 
2653     /* post receives of other's nzlocal */
2654     for (i=0; i<nrecvs; i++) {
2655       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2656     }
2657     /* send nzlocal to others */
2658     for (i=0; i<nsends; i++) {
2659       sbuf_nz[i] = nzlocal;
2660       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2661     }
2662     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2663     count = nrecvs;
2664     while (count) {
2665       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2666 
2667       recv_rank[imdex] = recv_status.MPI_SOURCE;
2668       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2669       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2670 
2671       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2672 
2673       rbuf_nz[imdex] += i + 2;
2674 
2675       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2676       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2677       count--;
2678     }
2679     /* wait on sends of nzlocal */
2680     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2681     /* send mat->i,j to others, and recv from other's */
2682     /*------------------------------------------------*/
2683     for (i=0; i<nsends; i++) {
2684       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2685       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2686     }
2687     /* wait on receives of mat->i,j */
2688     /*------------------------------*/
2689     count = nrecvs;
2690     while (count) {
2691       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2692       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2693       count--;
2694     }
2695     /* wait on sends of mat->i,j */
2696     /*---------------------------*/
2697     if (nsends) {
2698       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2699     }
2700   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2701 
2702   /* post receives, send and receive mat->a */
2703   /*----------------------------------------*/
2704   for (imdex=0; imdex<nrecvs; imdex++) {
2705     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2706   }
2707   for (i=0; i<nsends; i++) {
2708     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2709   }
2710   count = nrecvs;
2711   while (count) {
2712     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2713     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2714     count--;
2715   }
2716   if (nsends) {
2717     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2718   }
2719 
2720   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2721 
2722   /* create redundant matrix */
2723   /*-------------------------*/
2724   if (reuse == MAT_INITIAL_MATRIX) {
2725     const PetscInt *range;
2726     PetscInt       rstart_sub,rend_sub,mloc_sub;
2727 
2728     /* compute rownz_max for preallocation */
2729     for (imdex=0; imdex<nrecvs; imdex++) {
2730       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2731       rptr = rbuf_j[imdex];
2732       for (i=0; i<j; i++) {
2733         ncols = rptr[i+1] - rptr[i];
2734         if (rownz_max < ncols) rownz_max = ncols;
2735       }
2736     }
2737 
2738     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2739 
2740     /* get local size of redundant matrix
2741        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2742     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2743     rstart_sub = range[nsubcomm*subrank];
2744     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2745       rend_sub = range[nsubcomm*(subrank+1)];
2746     } else {
2747       rend_sub = mat->rmap->N;
2748     }
2749     mloc_sub = rend_sub - rstart_sub;
2750 
2751     if (M == N) {
2752       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2753     } else { /* non-square matrix */
2754       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2755     }
2756     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2757     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2758     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2759     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2760   } else {
2761     C = *matredundant;
2762   }
2763 
2764   /* insert local matrix entries */
2765   rptr = sbuf_j;
2766   cols = sbuf_j + rend-rstart + 1;
2767   vals = sbuf_a;
2768   for (i=0; i<rend-rstart; i++) {
2769     row   = i + rstart;
2770     ncols = rptr[i+1] - rptr[i];
2771     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2772     vals += ncols;
2773     cols += ncols;
2774   }
2775   /* insert received matrix entries */
2776   for (imdex=0; imdex<nrecvs; imdex++) {
2777     rstart = rowrange[recv_rank[imdex]];
2778     rend   = rowrange[recv_rank[imdex]+1];
2779     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2780     rptr   = rbuf_j[imdex];
2781     cols   = rbuf_j[imdex] + rend-rstart + 1;
2782     vals   = rbuf_a[imdex];
2783     for (i=0; i<rend-rstart; i++) {
2784       row   = i + rstart;
2785       ncols = rptr[i+1] - rptr[i];
2786       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2787       vals += ncols;
2788       cols += ncols;
2789     }
2790   }
2791   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2793 
2794   if (reuse == MAT_INITIAL_MATRIX) {
2795     *matredundant = C;
2796 
2797     /* create a supporting struct and attach it to C for reuse */
2798     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2799     if (subsize == 1) {
2800       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2801       c->redundant = redund;
2802     } else {
2803       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2804       c->redundant = redund;
2805     }
2806 
2807     redund->nzlocal   = nzlocal;
2808     redund->nsends    = nsends;
2809     redund->nrecvs    = nrecvs;
2810     redund->send_rank = send_rank;
2811     redund->recv_rank = recv_rank;
2812     redund->sbuf_nz   = sbuf_nz;
2813     redund->rbuf_nz   = rbuf_nz;
2814     redund->sbuf_j    = sbuf_j;
2815     redund->sbuf_a    = sbuf_a;
2816     redund->rbuf_j    = rbuf_j;
2817     redund->rbuf_a    = rbuf_a;
2818     redund->psubcomm  = NULL;
2819 
2820     redund->Destroy = C->ops->destroy;
2821     C->ops->destroy = MatDestroy_MatRedundant;
2822   }
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2828 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2829 {
2830   PetscErrorCode ierr;
2831   MPI_Comm       comm;
2832   PetscMPIInt    size,subsize;
2833   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2834   Mat_Redundant  *redund=NULL;
2835   PetscSubcomm   psubcomm=NULL;
2836   MPI_Comm       subcomm_in=subcomm;
2837   Mat            *matseq;
2838   IS             isrow,iscol;
2839 
2840   PetscFunctionBegin;
2841   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2842     if (reuse ==  MAT_INITIAL_MATRIX) {
2843       /* create psubcomm, then get subcomm */
2844       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2845       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2846       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2847 
2848       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2849       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2850       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2851       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2852       subcomm = psubcomm->comm;
2853     } else { /* retrieve psubcomm and subcomm */
2854       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2855       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2856       if (subsize == 1) {
2857         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2858         redund = c->redundant;
2859       } else {
2860         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2861         redund = c->redundant;
2862       }
2863       psubcomm = redund->psubcomm;
2864     }
2865     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2866       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2867       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2868         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2869         if (subsize == 1) {
2870           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2871           c->redundant->psubcomm = psubcomm;
2872         } else {
2873           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2874           c->redundant->psubcomm = psubcomm ;
2875         }
2876       }
2877       PetscFunctionReturn(0);
2878     }
2879   }
2880 
2881   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2882   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2883   if (reuse == MAT_INITIAL_MATRIX) {
2884     /* create a local sequential matrix matseq[0] */
2885     mloc_sub = PETSC_DECIDE;
2886     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2887     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2888     rstart = rend - mloc_sub;
2889     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2890     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2891   } else { /* reuse == MAT_REUSE_MATRIX */
2892     if (subsize == 1) {
2893       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2894       redund = c->redundant;
2895     } else {
2896       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2897       redund = c->redundant;
2898     }
2899 
2900     isrow  = redund->isrow;
2901     iscol  = redund->iscol;
2902     matseq = redund->matseq;
2903   }
2904   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2905   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2906 
2907   if (reuse == MAT_INITIAL_MATRIX) {
2908     /* create a supporting struct and attach it to C for reuse */
2909     ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2910     if (subsize == 1) {
2911       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2912       c->redundant = redund;
2913     } else {
2914       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2915       c->redundant = redund;
2916     }
2917     redund->isrow    = isrow;
2918     redund->iscol    = iscol;
2919     redund->matseq   = matseq;
2920     redund->psubcomm = psubcomm;
2921     redund->Destroy               = (*matredundant)->ops->destroy;
2922     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2923   }
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2929 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2930 {
2931   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2932   PetscErrorCode ierr;
2933   PetscInt       i,*idxb = 0;
2934   PetscScalar    *va,*vb;
2935   Vec            vtmp;
2936 
2937   PetscFunctionBegin;
2938   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2939   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2940   if (idx) {
2941     for (i=0; i<A->rmap->n; i++) {
2942       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2943     }
2944   }
2945 
2946   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2947   if (idx) {
2948     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2949   }
2950   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2951   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2952 
2953   for (i=0; i<A->rmap->n; i++) {
2954     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2955       va[i] = vb[i];
2956       if (idx) idx[i] = a->garray[idxb[i]];
2957     }
2958   }
2959 
2960   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2961   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2962   ierr = PetscFree(idxb);CHKERRQ(ierr);
2963   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2969 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2970 {
2971   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2972   PetscErrorCode ierr;
2973   PetscInt       i,*idxb = 0;
2974   PetscScalar    *va,*vb;
2975   Vec            vtmp;
2976 
2977   PetscFunctionBegin;
2978   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2979   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2980   if (idx) {
2981     for (i=0; i<A->cmap->n; i++) {
2982       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2983     }
2984   }
2985 
2986   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2987   if (idx) {
2988     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2989   }
2990   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2991   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2992 
2993   for (i=0; i<A->rmap->n; i++) {
2994     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2995       va[i] = vb[i];
2996       if (idx) idx[i] = a->garray[idxb[i]];
2997     }
2998   }
2999 
3000   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3002   ierr = PetscFree(idxb);CHKERRQ(ierr);
3003   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 #undef __FUNCT__
3008 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3009 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3010 {
3011   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3012   PetscInt       n      = A->rmap->n;
3013   PetscInt       cstart = A->cmap->rstart;
3014   PetscInt       *cmap  = mat->garray;
3015   PetscInt       *diagIdx, *offdiagIdx;
3016   Vec            diagV, offdiagV;
3017   PetscScalar    *a, *diagA, *offdiagA;
3018   PetscInt       r;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3023   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3024   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3025   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3026   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3027   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3028   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3029   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3030   for (r = 0; r < n; ++r) {
3031     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3032       a[r]   = diagA[r];
3033       idx[r] = cstart + diagIdx[r];
3034     } else {
3035       a[r]   = offdiagA[r];
3036       idx[r] = cmap[offdiagIdx[r]];
3037     }
3038   }
3039   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3040   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3041   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3042   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3043   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3044   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3050 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3051 {
3052   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3053   PetscInt       n      = A->rmap->n;
3054   PetscInt       cstart = A->cmap->rstart;
3055   PetscInt       *cmap  = mat->garray;
3056   PetscInt       *diagIdx, *offdiagIdx;
3057   Vec            diagV, offdiagV;
3058   PetscScalar    *a, *diagA, *offdiagA;
3059   PetscInt       r;
3060   PetscErrorCode ierr;
3061 
3062   PetscFunctionBegin;
3063   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3064   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3065   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3066   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3067   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3068   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3069   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3070   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3071   for (r = 0; r < n; ++r) {
3072     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3073       a[r]   = diagA[r];
3074       idx[r] = cstart + diagIdx[r];
3075     } else {
3076       a[r]   = offdiagA[r];
3077       idx[r] = cmap[offdiagIdx[r]];
3078     }
3079   }
3080   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3081   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3082   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3083   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3084   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3085   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3091 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3092 {
3093   PetscErrorCode ierr;
3094   Mat            *dummy;
3095 
3096   PetscFunctionBegin;
3097   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3098   *newmat = *dummy;
3099   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 #undef __FUNCT__
3104 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3105 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3106 {
3107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3108   PetscErrorCode ierr;
3109 
3110   PetscFunctionBegin;
3111   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3117 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3118 {
3119   PetscErrorCode ierr;
3120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3121 
3122   PetscFunctionBegin;
3123   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3124   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3125   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3126   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 
3130 /* -------------------------------------------------------------------*/
3131 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3132                                        MatGetRow_MPIAIJ,
3133                                        MatRestoreRow_MPIAIJ,
3134                                        MatMult_MPIAIJ,
3135                                 /* 4*/ MatMultAdd_MPIAIJ,
3136                                        MatMultTranspose_MPIAIJ,
3137                                        MatMultTransposeAdd_MPIAIJ,
3138 #if defined(PETSC_HAVE_PBGL)
3139                                        MatSolve_MPIAIJ,
3140 #else
3141                                        0,
3142 #endif
3143                                        0,
3144                                        0,
3145                                 /*10*/ 0,
3146                                        0,
3147                                        0,
3148                                        MatSOR_MPIAIJ,
3149                                        MatTranspose_MPIAIJ,
3150                                 /*15*/ MatGetInfo_MPIAIJ,
3151                                        MatEqual_MPIAIJ,
3152                                        MatGetDiagonal_MPIAIJ,
3153                                        MatDiagonalScale_MPIAIJ,
3154                                        MatNorm_MPIAIJ,
3155                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3156                                        MatAssemblyEnd_MPIAIJ,
3157                                        MatSetOption_MPIAIJ,
3158                                        MatZeroEntries_MPIAIJ,
3159                                 /*24*/ MatZeroRows_MPIAIJ,
3160                                        0,
3161 #if defined(PETSC_HAVE_PBGL)
3162                                        0,
3163 #else
3164                                        0,
3165 #endif
3166                                        0,
3167                                        0,
3168                                 /*29*/ MatSetUp_MPIAIJ,
3169 #if defined(PETSC_HAVE_PBGL)
3170                                        0,
3171 #else
3172                                        0,
3173 #endif
3174                                        0,
3175                                        0,
3176                                        0,
3177                                 /*34*/ MatDuplicate_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                 /*39*/ MatAXPY_MPIAIJ,
3183                                        MatGetSubMatrices_MPIAIJ,
3184                                        MatIncreaseOverlap_MPIAIJ,
3185                                        MatGetValues_MPIAIJ,
3186                                        MatCopy_MPIAIJ,
3187                                 /*44*/ MatGetRowMax_MPIAIJ,
3188                                        MatScale_MPIAIJ,
3189                                        0,
3190                                        0,
3191                                        MatZeroRowsColumns_MPIAIJ,
3192                                 /*49*/ MatSetRandom_MPIAIJ,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                        0,
3197                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3198                                        0,
3199                                        MatSetUnfactored_MPIAIJ,
3200                                        MatPermute_MPIAIJ,
3201                                        0,
3202                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3203                                        MatDestroy_MPIAIJ,
3204                                        MatView_MPIAIJ,
3205                                        0,
3206                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3207                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3208                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3209                                        0,
3210                                        0,
3211                                        0,
3212                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3213                                        MatGetRowMinAbs_MPIAIJ,
3214                                        0,
3215                                        MatSetColoring_MPIAIJ,
3216                                        0,
3217                                        MatSetValuesAdifor_MPIAIJ,
3218                                 /*75*/ MatFDColoringApply_AIJ,
3219                                        0,
3220                                        0,
3221                                        0,
3222                                        MatFindZeroDiagonals_MPIAIJ,
3223                                 /*80*/ 0,
3224                                        0,
3225                                        0,
3226                                 /*83*/ MatLoad_MPIAIJ,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                        0,
3231                                        0,
3232                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3233                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3234                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3235                                        MatPtAP_MPIAIJ_MPIAIJ,
3236                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3237                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3238                                        0,
3239                                        0,
3240                                        0,
3241                                        0,
3242                                 /*99*/ 0,
3243                                        0,
3244                                        0,
3245                                        MatConjugate_MPIAIJ,
3246                                        0,
3247                                 /*104*/MatSetValuesRow_MPIAIJ,
3248                                        MatRealPart_MPIAIJ,
3249                                        MatImaginaryPart_MPIAIJ,
3250                                        0,
3251                                        0,
3252                                 /*109*/0,
3253                                        MatGetRedundantMatrix_MPIAIJ,
3254                                        MatGetRowMin_MPIAIJ,
3255                                        0,
3256                                        0,
3257                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3258                                        0,
3259                                        0,
3260                                        0,
3261                                        0,
3262                                 /*119*/0,
3263                                        0,
3264                                        0,
3265                                        0,
3266                                        MatGetMultiProcBlock_MPIAIJ,
3267                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3268                                        MatGetColumnNorms_MPIAIJ,
3269                                        MatInvertBlockDiagonal_MPIAIJ,
3270                                        0,
3271                                        MatGetSubMatricesParallel_MPIAIJ,
3272                                 /*129*/0,
3273                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3274                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3275                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3276                                        0,
3277                                 /*134*/0,
3278                                        0,
3279                                        0,
3280                                        0,
3281                                        0,
3282                                 /*139*/0,
3283                                        0,
3284                                        0,
3285                                        MatFDColoringSetUp_MPIXAIJ
3286 };
3287 
3288 /* ----------------------------------------------------------------------------------------*/
3289 
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3292 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3293 {
3294   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3295   PetscErrorCode ierr;
3296 
3297   PetscFunctionBegin;
3298   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3299   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3305 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3306 {
3307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3308   PetscErrorCode ierr;
3309 
3310   PetscFunctionBegin;
3311   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3312   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3318 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3319 {
3320   Mat_MPIAIJ     *b;
3321   PetscErrorCode ierr;
3322 
3323   PetscFunctionBegin;
3324   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3325   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3326   b = (Mat_MPIAIJ*)B->data;
3327 
3328   if (!B->preallocated) {
3329     /* Explicitly create 2 MATSEQAIJ matrices. */
3330     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3331     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3332     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3333     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3334     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3335     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3336     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3337     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3338     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3339     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3340   }
3341 
3342   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3343   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3344   B->preallocated = PETSC_TRUE;
3345   PetscFunctionReturn(0);
3346 }
3347 
3348 #undef __FUNCT__
3349 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3350 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3351 {
3352   Mat            mat;
3353   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   *newmat = 0;
3358   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3359   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3360   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3361   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3362   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3363   a       = (Mat_MPIAIJ*)mat->data;
3364 
3365   mat->factortype   = matin->factortype;
3366   mat->rmap->bs     = matin->rmap->bs;
3367   mat->cmap->bs     = matin->cmap->bs;
3368   mat->assembled    = PETSC_TRUE;
3369   mat->insertmode   = NOT_SET_VALUES;
3370   mat->preallocated = PETSC_TRUE;
3371 
3372   a->size         = oldmat->size;
3373   a->rank         = oldmat->rank;
3374   a->donotstash   = oldmat->donotstash;
3375   a->roworiented  = oldmat->roworiented;
3376   a->rowindices   = 0;
3377   a->rowvalues    = 0;
3378   a->getrowactive = PETSC_FALSE;
3379 
3380   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3381   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3382 
3383   if (oldmat->colmap) {
3384 #if defined(PETSC_USE_CTABLE)
3385     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3386 #else
3387     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3388     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3389     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3390 #endif
3391   } else a->colmap = 0;
3392   if (oldmat->garray) {
3393     PetscInt len;
3394     len  = oldmat->B->cmap->n;
3395     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3396     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3397     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3398   } else a->garray = 0;
3399 
3400   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3401   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3402   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3403   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3404   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3405   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3406   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3407   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3408   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3409   *newmat = mat;
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatLoad_MPIAIJ"
3417 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3418 {
3419   PetscScalar    *vals,*svals;
3420   MPI_Comm       comm;
3421   PetscErrorCode ierr;
3422   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3423   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3424   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3425   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3426   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3427   int            fd;
3428   PetscInt       bs = 1;
3429 
3430   PetscFunctionBegin;
3431   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3434   if (!rank) {
3435     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3436     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3437     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3438   }
3439 
3440   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3441   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3442   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3443 
3444   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3445 
3446   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3447   M    = header[1]; N = header[2];
3448   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3449   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3450   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3451 
3452   /* If global sizes are set, check if they are consistent with that given in the file */
3453   if (sizesset) {
3454     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3455   }
3456   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3457   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3458 
3459   /* determine ownership of all (block) rows */
3460   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3461   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3462   else m = newMat->rmap->n; /* Set by user */
3463 
3464   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3465   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3466 
3467   /* First process needs enough room for process with most rows */
3468   if (!rank) {
3469     mmax = rowners[1];
3470     for (i=2; i<=size; i++) {
3471       mmax = PetscMax(mmax, rowners[i]);
3472     }
3473   } else mmax = -1;             /* unused, but compilers complain */
3474 
3475   rowners[0] = 0;
3476   for (i=2; i<=size; i++) {
3477     rowners[i] += rowners[i-1];
3478   }
3479   rstart = rowners[rank];
3480   rend   = rowners[rank+1];
3481 
3482   /* distribute row lengths to all processors */
3483   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3484   if (!rank) {
3485     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3486     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3487     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3488     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3489     for (j=0; j<m; j++) {
3490       procsnz[0] += ourlens[j];
3491     }
3492     for (i=1; i<size; i++) {
3493       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3494       /* calculate the number of nonzeros on each processor */
3495       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3496         procsnz[i] += rowlengths[j];
3497       }
3498       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3499     }
3500     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3501   } else {
3502     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3503   }
3504 
3505   if (!rank) {
3506     /* determine max buffer needed and allocate it */
3507     maxnz = 0;
3508     for (i=0; i<size; i++) {
3509       maxnz = PetscMax(maxnz,procsnz[i]);
3510     }
3511     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3512 
3513     /* read in my part of the matrix column indices  */
3514     nz   = procsnz[0];
3515     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3516     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3517 
3518     /* read in every one elses and ship off */
3519     for (i=1; i<size; i++) {
3520       nz   = procsnz[i];
3521       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3522       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3523     }
3524     ierr = PetscFree(cols);CHKERRQ(ierr);
3525   } else {
3526     /* determine buffer space needed for message */
3527     nz = 0;
3528     for (i=0; i<m; i++) {
3529       nz += ourlens[i];
3530     }
3531     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3532 
3533     /* receive message of column indices*/
3534     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3535   }
3536 
3537   /* determine column ownership if matrix is not square */
3538   if (N != M) {
3539     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3540     else n = newMat->cmap->n;
3541     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3542     cstart = cend - n;
3543   } else {
3544     cstart = rstart;
3545     cend   = rend;
3546     n      = cend - cstart;
3547   }
3548 
3549   /* loop over local rows, determining number of off diagonal entries */
3550   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3551   jj   = 0;
3552   for (i=0; i<m; i++) {
3553     for (j=0; j<ourlens[i]; j++) {
3554       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3555       jj++;
3556     }
3557   }
3558 
3559   for (i=0; i<m; i++) {
3560     ourlens[i] -= offlens[i];
3561   }
3562   if (!sizesset) {
3563     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3564   }
3565 
3566   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3567 
3568   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3569 
3570   for (i=0; i<m; i++) {
3571     ourlens[i] += offlens[i];
3572   }
3573 
3574   if (!rank) {
3575     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3576 
3577     /* read in my part of the matrix numerical values  */
3578     nz   = procsnz[0];
3579     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3580 
3581     /* insert into matrix */
3582     jj      = rstart;
3583     smycols = mycols;
3584     svals   = vals;
3585     for (i=0; i<m; i++) {
3586       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3587       smycols += ourlens[i];
3588       svals   += ourlens[i];
3589       jj++;
3590     }
3591 
3592     /* read in other processors and ship out */
3593     for (i=1; i<size; i++) {
3594       nz   = procsnz[i];
3595       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3596       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3597     }
3598     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3599   } else {
3600     /* receive numeric values */
3601     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3602 
3603     /* receive message of values*/
3604     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3605 
3606     /* insert into matrix */
3607     jj      = rstart;
3608     smycols = mycols;
3609     svals   = vals;
3610     for (i=0; i<m; i++) {
3611       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3612       smycols += ourlens[i];
3613       svals   += ourlens[i];
3614       jj++;
3615     }
3616   }
3617   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3618   ierr = PetscFree(vals);CHKERRQ(ierr);
3619   ierr = PetscFree(mycols);CHKERRQ(ierr);
3620   ierr = PetscFree(rowners);CHKERRQ(ierr);
3621   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   PetscFunctionReturn(0);
3624 }
3625 
3626 #undef __FUNCT__
3627 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3628 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3629 {
3630   PetscErrorCode ierr;
3631   IS             iscol_local;
3632   PetscInt       csize;
3633 
3634   PetscFunctionBegin;
3635   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3636   if (call == MAT_REUSE_MATRIX) {
3637     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3638     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3639   } else {
3640     PetscInt cbs;
3641     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3642     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3643     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3644   }
3645   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3646   if (call == MAT_INITIAL_MATRIX) {
3647     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3648     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3649   }
3650   PetscFunctionReturn(0);
3651 }
3652 
3653 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3654 #undef __FUNCT__
3655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3656 /*
3657     Not great since it makes two copies of the submatrix, first an SeqAIJ
3658   in local and then by concatenating the local matrices the end result.
3659   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3660 
3661   Note: This requires a sequential iscol with all indices.
3662 */
3663 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3664 {
3665   PetscErrorCode ierr;
3666   PetscMPIInt    rank,size;
3667   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3668   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3669   PetscBool      allcolumns, colflag;
3670   Mat            M,Mreuse;
3671   MatScalar      *vwork,*aa;
3672   MPI_Comm       comm;
3673   Mat_SeqAIJ     *aij;
3674 
3675   PetscFunctionBegin;
3676   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3677   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3679 
3680   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3681   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3682   if (colflag && ncol == mat->cmap->N) {
3683     allcolumns = PETSC_TRUE;
3684   } else {
3685     allcolumns = PETSC_FALSE;
3686   }
3687   if (call ==  MAT_REUSE_MATRIX) {
3688     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3689     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3690     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3691   } else {
3692     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3693   }
3694 
3695   /*
3696       m - number of local rows
3697       n - number of columns (same on all processors)
3698       rstart - first row in new global matrix generated
3699   */
3700   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3701   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3702   if (call == MAT_INITIAL_MATRIX) {
3703     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3704     ii  = aij->i;
3705     jj  = aij->j;
3706 
3707     /*
3708         Determine the number of non-zeros in the diagonal and off-diagonal
3709         portions of the matrix in order to do correct preallocation
3710     */
3711 
3712     /* first get start and end of "diagonal" columns */
3713     if (csize == PETSC_DECIDE) {
3714       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3715       if (mglobal == n) { /* square matrix */
3716         nlocal = m;
3717       } else {
3718         nlocal = n/size + ((n % size) > rank);
3719       }
3720     } else {
3721       nlocal = csize;
3722     }
3723     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3724     rstart = rend - nlocal;
3725     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3726 
3727     /* next, compute all the lengths */
3728     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3729     olens = dlens + m;
3730     for (i=0; i<m; i++) {
3731       jend = ii[i+1] - ii[i];
3732       olen = 0;
3733       dlen = 0;
3734       for (j=0; j<jend; j++) {
3735         if (*jj < rstart || *jj >= rend) olen++;
3736         else dlen++;
3737         jj++;
3738       }
3739       olens[i] = olen;
3740       dlens[i] = dlen;
3741     }
3742     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3743     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3744     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3745     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3746     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3747     ierr = PetscFree(dlens);CHKERRQ(ierr);
3748   } else {
3749     PetscInt ml,nl;
3750 
3751     M    = *newmat;
3752     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3753     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3754     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3755     /*
3756          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3757        rather than the slower MatSetValues().
3758     */
3759     M->was_assembled = PETSC_TRUE;
3760     M->assembled     = PETSC_FALSE;
3761   }
3762   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3763   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3764   ii   = aij->i;
3765   jj   = aij->j;
3766   aa   = aij->a;
3767   for (i=0; i<m; i++) {
3768     row   = rstart + i;
3769     nz    = ii[i+1] - ii[i];
3770     cwork = jj;     jj += nz;
3771     vwork = aa;     aa += nz;
3772     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3773   }
3774 
3775   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3777   *newmat = M;
3778 
3779   /* save submatrix used in processor for next request */
3780   if (call ==  MAT_INITIAL_MATRIX) {
3781     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3782     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3783   }
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 #undef __FUNCT__
3788 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3789 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3790 {
3791   PetscInt       m,cstart, cend,j,nnz,i,d;
3792   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3793   const PetscInt *JJ;
3794   PetscScalar    *values;
3795   PetscErrorCode ierr;
3796 
3797   PetscFunctionBegin;
3798   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3799 
3800   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3801   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3802   m      = B->rmap->n;
3803   cstart = B->cmap->rstart;
3804   cend   = B->cmap->rend;
3805   rstart = B->rmap->rstart;
3806 
3807   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3808 
3809 #if defined(PETSC_USE_DEBUGGING)
3810   for (i=0; i<m; i++) {
3811     nnz = Ii[i+1]- Ii[i];
3812     JJ  = J + Ii[i];
3813     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3814     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3815     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3816   }
3817 #endif
3818 
3819   for (i=0; i<m; i++) {
3820     nnz     = Ii[i+1]- Ii[i];
3821     JJ      = J + Ii[i];
3822     nnz_max = PetscMax(nnz_max,nnz);
3823     d       = 0;
3824     for (j=0; j<nnz; j++) {
3825       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3826     }
3827     d_nnz[i] = d;
3828     o_nnz[i] = nnz - d;
3829   }
3830   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3831   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3832 
3833   if (v) values = (PetscScalar*)v;
3834   else {
3835     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3836     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3837   }
3838 
3839   for (i=0; i<m; i++) {
3840     ii   = i + rstart;
3841     nnz  = Ii[i+1]- Ii[i];
3842     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3843   }
3844   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846 
3847   if (!v) {
3848     ierr = PetscFree(values);CHKERRQ(ierr);
3849   }
3850   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3856 /*@
3857    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3858    (the default parallel PETSc format).
3859 
3860    Collective on MPI_Comm
3861 
3862    Input Parameters:
3863 +  B - the matrix
3864 .  i - the indices into j for the start of each local row (starts with zero)
3865 .  j - the column indices for each local row (starts with zero)
3866 -  v - optional values in the matrix
3867 
3868    Level: developer
3869 
3870    Notes:
3871        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3872      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3873      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3874 
3875        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3876 
3877        The format which is used for the sparse matrix input, is equivalent to a
3878     row-major ordering.. i.e for the following matrix, the input data expected is
3879     as shown:
3880 
3881         1 0 0
3882         2 0 3     P0
3883        -------
3884         4 5 6     P1
3885 
3886      Process0 [P0]: rows_owned=[0,1]
3887         i =  {0,1,3}  [size = nrow+1  = 2+1]
3888         j =  {0,0,2}  [size = nz = 6]
3889         v =  {1,2,3}  [size = nz = 6]
3890 
3891      Process1 [P1]: rows_owned=[2]
3892         i =  {0,3}    [size = nrow+1  = 1+1]
3893         j =  {0,1,2}  [size = nz = 6]
3894         v =  {4,5,6}  [size = nz = 6]
3895 
3896 .keywords: matrix, aij, compressed row, sparse, parallel
3897 
3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3899           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3900 @*/
3901 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3902 {
3903   PetscErrorCode ierr;
3904 
3905   PetscFunctionBegin;
3906   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3907   PetscFunctionReturn(0);
3908 }
3909 
3910 #undef __FUNCT__
3911 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3912 /*@C
3913    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3914    (the default parallel PETSc format).  For good matrix assembly performance
3915    the user should preallocate the matrix storage by setting the parameters
3916    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3917    performance can be increased by more than a factor of 50.
3918 
3919    Collective on MPI_Comm
3920 
3921    Input Parameters:
3922 +  A - the matrix
3923 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3924            (same value is used for all local rows)
3925 .  d_nnz - array containing the number of nonzeros in the various rows of the
3926            DIAGONAL portion of the local submatrix (possibly different for each row)
3927            or NULL, if d_nz is used to specify the nonzero structure.
3928            The size of this array is equal to the number of local rows, i.e 'm'.
3929            For matrices that will be factored, you must leave room for (and set)
3930            the diagonal entry even if it is zero.
3931 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3932            submatrix (same value is used for all local rows).
3933 -  o_nnz - array containing the number of nonzeros in the various rows of the
3934            OFF-DIAGONAL portion of the local submatrix (possibly different for
3935            each row) or NULL, if o_nz is used to specify the nonzero
3936            structure. The size of this array is equal to the number
3937            of local rows, i.e 'm'.
3938 
3939    If the *_nnz parameter is given then the *_nz parameter is ignored
3940 
3941    The AIJ format (also called the Yale sparse matrix format or
3942    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3943    storage.  The stored row and column indices begin with zero.
3944    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3945 
3946    The parallel matrix is partitioned such that the first m0 rows belong to
3947    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3948    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3949 
3950    The DIAGONAL portion of the local submatrix of a processor can be defined
3951    as the submatrix which is obtained by extraction the part corresponding to
3952    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3953    first row that belongs to the processor, r2 is the last row belonging to
3954    the this processor, and c1-c2 is range of indices of the local part of a
3955    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3956    common case of a square matrix, the row and column ranges are the same and
3957    the DIAGONAL part is also square. The remaining portion of the local
3958    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3959 
3960    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3961 
3962    You can call MatGetInfo() to get information on how effective the preallocation was;
3963    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3964    You can also run with the option -info and look for messages with the string
3965    malloc in them to see if additional memory allocation was needed.
3966 
3967    Example usage:
3968 
3969    Consider the following 8x8 matrix with 34 non-zero values, that is
3970    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3971    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3972    as follows:
3973 
3974 .vb
3975             1  2  0  |  0  3  0  |  0  4
3976     Proc0   0  5  6  |  7  0  0  |  8  0
3977             9  0 10  | 11  0  0  | 12  0
3978     -------------------------------------
3979            13  0 14  | 15 16 17  |  0  0
3980     Proc1   0 18  0  | 19 20 21  |  0  0
3981             0  0  0  | 22 23  0  | 24  0
3982     -------------------------------------
3983     Proc2  25 26 27  |  0  0 28  | 29  0
3984            30  0  0  | 31 32 33  |  0 34
3985 .ve
3986 
3987    This can be represented as a collection of submatrices as:
3988 
3989 .vb
3990       A B C
3991       D E F
3992       G H I
3993 .ve
3994 
3995    Where the submatrices A,B,C are owned by proc0, D,E,F are
3996    owned by proc1, G,H,I are owned by proc2.
3997 
3998    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4000    The 'M','N' parameters are 8,8, and have the same values on all procs.
4001 
4002    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4003    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4004    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4005    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4006    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4007    matrix, ans [DF] as another SeqAIJ matrix.
4008 
4009    When d_nz, o_nz parameters are specified, d_nz storage elements are
4010    allocated for every row of the local diagonal submatrix, and o_nz
4011    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4012    One way to choose d_nz and o_nz is to use the max nonzerors per local
4013    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4014    In this case, the values of d_nz,o_nz are:
4015 .vb
4016      proc0 : dnz = 2, o_nz = 2
4017      proc1 : dnz = 3, o_nz = 2
4018      proc2 : dnz = 1, o_nz = 4
4019 .ve
4020    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4021    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4022    for proc3. i.e we are using 12+15+10=37 storage locations to store
4023    34 values.
4024 
4025    When d_nnz, o_nnz parameters are specified, the storage is specified
4026    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4027    In the above case the values for d_nnz,o_nnz are:
4028 .vb
4029      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4030      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4031      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4032 .ve
4033    Here the space allocated is sum of all the above values i.e 34, and
4034    hence pre-allocation is perfect.
4035 
4036    Level: intermediate
4037 
4038 .keywords: matrix, aij, compressed row, sparse, parallel
4039 
4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4041           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4042 @*/
4043 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4044 {
4045   PetscErrorCode ierr;
4046 
4047   PetscFunctionBegin;
4048   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4049   PetscValidType(B,1);
4050   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 #undef __FUNCT__
4055 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4056 /*@
4057      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4058          CSR format the local rows.
4059 
4060    Collective on MPI_Comm
4061 
4062    Input Parameters:
4063 +  comm - MPI communicator
4064 .  m - number of local rows (Cannot be PETSC_DECIDE)
4065 .  n - This value should be the same as the local size used in creating the
4066        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4067        calculated if N is given) For square matrices n is almost always m.
4068 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4069 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4070 .   i - row indices
4071 .   j - column indices
4072 -   a - matrix values
4073 
4074    Output Parameter:
4075 .   mat - the matrix
4076 
4077    Level: intermediate
4078 
4079    Notes:
4080        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4081      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4082      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4083 
4084        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4085 
4086        The format which is used for the sparse matrix input, is equivalent to a
4087     row-major ordering.. i.e for the following matrix, the input data expected is
4088     as shown:
4089 
4090         1 0 0
4091         2 0 3     P0
4092        -------
4093         4 5 6     P1
4094 
4095      Process0 [P0]: rows_owned=[0,1]
4096         i =  {0,1,3}  [size = nrow+1  = 2+1]
4097         j =  {0,0,2}  [size = nz = 6]
4098         v =  {1,2,3}  [size = nz = 6]
4099 
4100      Process1 [P1]: rows_owned=[2]
4101         i =  {0,3}    [size = nrow+1  = 1+1]
4102         j =  {0,1,2}  [size = nz = 6]
4103         v =  {4,5,6}  [size = nz = 6]
4104 
4105 .keywords: matrix, aij, compressed row, sparse, parallel
4106 
4107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4108           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4109 @*/
4110 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4111 {
4112   PetscErrorCode ierr;
4113 
4114   PetscFunctionBegin;
4115   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4116   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4117   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4118   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4119   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4120   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4121   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4122   PetscFunctionReturn(0);
4123 }
4124 
4125 #undef __FUNCT__
4126 #define __FUNCT__ "MatCreateAIJ"
4127 /*@C
4128    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4129    (the default parallel PETSc format).  For good matrix assembly performance
4130    the user should preallocate the matrix storage by setting the parameters
4131    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4132    performance can be increased by more than a factor of 50.
4133 
4134    Collective on MPI_Comm
4135 
4136    Input Parameters:
4137 +  comm - MPI communicator
4138 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4139            This value should be the same as the local size used in creating the
4140            y vector for the matrix-vector product y = Ax.
4141 .  n - This value should be the same as the local size used in creating the
4142        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4143        calculated if N is given) For square matrices n is almost always m.
4144 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4145 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4146 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4147            (same value is used for all local rows)
4148 .  d_nnz - array containing the number of nonzeros in the various rows of the
4149            DIAGONAL portion of the local submatrix (possibly different for each row)
4150            or NULL, if d_nz is used to specify the nonzero structure.
4151            The size of this array is equal to the number of local rows, i.e 'm'.
4152 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4153            submatrix (same value is used for all local rows).
4154 -  o_nnz - array containing the number of nonzeros in the various rows of the
4155            OFF-DIAGONAL portion of the local submatrix (possibly different for
4156            each row) or NULL, if o_nz is used to specify the nonzero
4157            structure. The size of this array is equal to the number
4158            of local rows, i.e 'm'.
4159 
4160    Output Parameter:
4161 .  A - the matrix
4162 
4163    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4164    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4165    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4166 
4167    Notes:
4168    If the *_nnz parameter is given then the *_nz parameter is ignored
4169 
4170    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4171    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4172    storage requirements for this matrix.
4173 
4174    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4175    processor than it must be used on all processors that share the object for
4176    that argument.
4177 
4178    The user MUST specify either the local or global matrix dimensions
4179    (possibly both).
4180 
4181    The parallel matrix is partitioned across processors such that the
4182    first m0 rows belong to process 0, the next m1 rows belong to
4183    process 1, the next m2 rows belong to process 2 etc.. where
4184    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4185    values corresponding to [m x N] submatrix.
4186 
4187    The columns are logically partitioned with the n0 columns belonging
4188    to 0th partition, the next n1 columns belonging to the next
4189    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4190 
4191    The DIAGONAL portion of the local submatrix on any given processor
4192    is the submatrix corresponding to the rows and columns m,n
4193    corresponding to the given processor. i.e diagonal matrix on
4194    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4195    etc. The remaining portion of the local submatrix [m x (N-n)]
4196    constitute the OFF-DIAGONAL portion. The example below better
4197    illustrates this concept.
4198 
4199    For a square global matrix we define each processor's diagonal portion
4200    to be its local rows and the corresponding columns (a square submatrix);
4201    each processor's off-diagonal portion encompasses the remainder of the
4202    local matrix (a rectangular submatrix).
4203 
4204    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4205 
4206    When calling this routine with a single process communicator, a matrix of
4207    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4208    type of communicator, use the construction mechanism:
4209      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4210 
4211    By default, this format uses inodes (identical nodes) when possible.
4212    We search for consecutive rows with the same nonzero structure, thereby
4213    reusing matrix information to achieve increased efficiency.
4214 
4215    Options Database Keys:
4216 +  -mat_no_inode  - Do not use inodes
4217 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4218 -  -mat_aij_oneindex - Internally use indexing starting at 1
4219         rather than 0.  Note that when calling MatSetValues(),
4220         the user still MUST index entries starting at 0!
4221 
4222 
4223    Example usage:
4224 
4225    Consider the following 8x8 matrix with 34 non-zero values, that is
4226    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4227    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4228    as follows:
4229 
4230 .vb
4231             1  2  0  |  0  3  0  |  0  4
4232     Proc0   0  5  6  |  7  0  0  |  8  0
4233             9  0 10  | 11  0  0  | 12  0
4234     -------------------------------------
4235            13  0 14  | 15 16 17  |  0  0
4236     Proc1   0 18  0  | 19 20 21  |  0  0
4237             0  0  0  | 22 23  0  | 24  0
4238     -------------------------------------
4239     Proc2  25 26 27  |  0  0 28  | 29  0
4240            30  0  0  | 31 32 33  |  0 34
4241 .ve
4242 
4243    This can be represented as a collection of submatrices as:
4244 
4245 .vb
4246       A B C
4247       D E F
4248       G H I
4249 .ve
4250 
4251    Where the submatrices A,B,C are owned by proc0, D,E,F are
4252    owned by proc1, G,H,I are owned by proc2.
4253 
4254    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4256    The 'M','N' parameters are 8,8, and have the same values on all procs.
4257 
4258    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4259    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4260    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4261    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4262    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4263    matrix, ans [DF] as another SeqAIJ matrix.
4264 
4265    When d_nz, o_nz parameters are specified, d_nz storage elements are
4266    allocated for every row of the local diagonal submatrix, and o_nz
4267    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4268    One way to choose d_nz and o_nz is to use the max nonzerors per local
4269    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4270    In this case, the values of d_nz,o_nz are:
4271 .vb
4272      proc0 : dnz = 2, o_nz = 2
4273      proc1 : dnz = 3, o_nz = 2
4274      proc2 : dnz = 1, o_nz = 4
4275 .ve
4276    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4277    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4278    for proc3. i.e we are using 12+15+10=37 storage locations to store
4279    34 values.
4280 
4281    When d_nnz, o_nnz parameters are specified, the storage is specified
4282    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4283    In the above case the values for d_nnz,o_nnz are:
4284 .vb
4285      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4286      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4287      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4288 .ve
4289    Here the space allocated is sum of all the above values i.e 34, and
4290    hence pre-allocation is perfect.
4291 
4292    Level: intermediate
4293 
4294 .keywords: matrix, aij, compressed row, sparse, parallel
4295 
4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4297           MPIAIJ, MatCreateMPIAIJWithArrays()
4298 @*/
4299 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4300 {
4301   PetscErrorCode ierr;
4302   PetscMPIInt    size;
4303 
4304   PetscFunctionBegin;
4305   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4306   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4307   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4308   if (size > 1) {
4309     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4310     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4311   } else {
4312     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4313     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4314   }
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4320 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4321 {
4322   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4323 
4324   PetscFunctionBegin;
4325   *Ad     = a->A;
4326   *Ao     = a->B;
4327   *colmap = a->garray;
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4333 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       i;
4337   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4338 
4339   PetscFunctionBegin;
4340   if (coloring->ctype == IS_COLORING_GLOBAL) {
4341     ISColoringValue *allcolors,*colors;
4342     ISColoring      ocoloring;
4343 
4344     /* set coloring for diagonal portion */
4345     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4346 
4347     /* set coloring for off-diagonal portion */
4348     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4349     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4350     for (i=0; i<a->B->cmap->n; i++) {
4351       colors[i] = allcolors[a->garray[i]];
4352     }
4353     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4354     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4355     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4356     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4357   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4358     ISColoringValue *colors;
4359     PetscInt        *larray;
4360     ISColoring      ocoloring;
4361 
4362     /* set coloring for diagonal portion */
4363     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4364     for (i=0; i<a->A->cmap->n; i++) {
4365       larray[i] = i + A->cmap->rstart;
4366     }
4367     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4368     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4369     for (i=0; i<a->A->cmap->n; i++) {
4370       colors[i] = coloring->colors[larray[i]];
4371     }
4372     ierr = PetscFree(larray);CHKERRQ(ierr);
4373     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4374     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4375     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4376 
4377     /* set coloring for off-diagonal portion */
4378     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4379     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4380     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4381     for (i=0; i<a->B->cmap->n; i++) {
4382       colors[i] = coloring->colors[larray[i]];
4383     }
4384     ierr = PetscFree(larray);CHKERRQ(ierr);
4385     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4386     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4387     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4388   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4394 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4395 {
4396   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4397   PetscErrorCode ierr;
4398 
4399   PetscFunctionBegin;
4400   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4401   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4402   PetscFunctionReturn(0);
4403 }
4404 
4405 #undef __FUNCT__
4406 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4407 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4408 {
4409   PetscErrorCode ierr;
4410   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4411   PetscInt       *indx;
4412 
4413   PetscFunctionBegin;
4414   /* This routine will ONLY return MPIAIJ type matrix */
4415   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4416   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4417   if (n == PETSC_DECIDE) {
4418     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4419   }
4420   /* Check sum(n) = N */
4421   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4422   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4423 
4424   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4425   rstart -= m;
4426 
4427   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4428   for (i=0; i<m; i++) {
4429     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4430     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4431     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4432   }
4433 
4434   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4435   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4436   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4437   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4438   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4439   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4445 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4446 {
4447   PetscErrorCode ierr;
4448   PetscInt       m,N,i,rstart,nnz,Ii;
4449   PetscInt       *indx;
4450   PetscScalar    *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4454   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4455   for (i=0; i<m; i++) {
4456     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4457     Ii   = i + rstart;
4458     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4459     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4460   }
4461   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 #undef __FUNCT__
4467 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4468 /*@
4469       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4470                  matrices from each processor
4471 
4472     Collective on MPI_Comm
4473 
4474    Input Parameters:
4475 +    comm - the communicators the parallel matrix will live on
4476 .    inmat - the input sequential matrices
4477 .    n - number of local columns (or PETSC_DECIDE)
4478 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4479 
4480    Output Parameter:
4481 .    outmat - the parallel matrix generated
4482 
4483     Level: advanced
4484 
4485    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4486 
4487 @*/
4488 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4489 {
4490   PetscErrorCode ierr;
4491   PetscMPIInt    size;
4492 
4493   PetscFunctionBegin;
4494   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4495   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4496   if (size == 1) {
4497     if (scall == MAT_INITIAL_MATRIX) {
4498       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4499     } else {
4500       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4501     }
4502   } else {
4503     if (scall == MAT_INITIAL_MATRIX) {
4504       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4505     }
4506     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4507   }
4508   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatFileSplit"
4514 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4515 {
4516   PetscErrorCode    ierr;
4517   PetscMPIInt       rank;
4518   PetscInt          m,N,i,rstart,nnz;
4519   size_t            len;
4520   const PetscInt    *indx;
4521   PetscViewer       out;
4522   char              *name;
4523   Mat               B;
4524   const PetscScalar *values;
4525 
4526   PetscFunctionBegin;
4527   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4528   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4529   /* Should this be the type of the diagonal block of A? */
4530   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4531   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4532   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4533   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4534   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4535   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4536   for (i=0; i<m; i++) {
4537     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4538     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4539     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4540   }
4541   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4542   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4543 
4544   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4545   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4546   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4547   sprintf(name,"%s.%d",outfile,rank);
4548   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4549   ierr = PetscFree(name);CHKERRQ(ierr);
4550   ierr = MatView(B,out);CHKERRQ(ierr);
4551   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4552   ierr = MatDestroy(&B);CHKERRQ(ierr);
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4557 #undef __FUNCT__
4558 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4559 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4560 {
4561   PetscErrorCode      ierr;
4562   Mat_Merge_SeqsToMPI *merge;
4563   PetscContainer      container;
4564 
4565   PetscFunctionBegin;
4566   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4567   if (container) {
4568     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4579     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4580     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4581     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4582     ierr = PetscFree(merge);CHKERRQ(ierr);
4583     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4584   }
4585   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #include <../src/mat/utils/freespace.h>
4590 #include <petscbt.h>
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4594 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4595 {
4596   PetscErrorCode      ierr;
4597   MPI_Comm            comm;
4598   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4599   PetscMPIInt         size,rank,taga,*len_s;
4600   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4601   PetscInt            proc,m;
4602   PetscInt            **buf_ri,**buf_rj;
4603   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4604   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4605   MPI_Request         *s_waits,*r_waits;
4606   MPI_Status          *status;
4607   MatScalar           *aa=a->a;
4608   MatScalar           **abuf_r,*ba_i;
4609   Mat_Merge_SeqsToMPI *merge;
4610   PetscContainer      container;
4611 
4612   PetscFunctionBegin;
4613   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4614   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4615 
4616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4617   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4618 
4619   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4620   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4621 
4622   bi     = merge->bi;
4623   bj     = merge->bj;
4624   buf_ri = merge->buf_ri;
4625   buf_rj = merge->buf_rj;
4626 
4627   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4628   owners = merge->rowmap->range;
4629   len_s  = merge->len_s;
4630 
4631   /* send and recv matrix values */
4632   /*-----------------------------*/
4633   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4634   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4635 
4636   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4637   for (proc=0,k=0; proc<size; proc++) {
4638     if (!len_s[proc]) continue;
4639     i    = owners[proc];
4640     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4641     k++;
4642   }
4643 
4644   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4645   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4646   ierr = PetscFree(status);CHKERRQ(ierr);
4647 
4648   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4650 
4651   /* insert mat values of mpimat */
4652   /*----------------------------*/
4653   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4654   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4655 
4656   for (k=0; k<merge->nrecv; k++) {
4657     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4658     nrows       = *(buf_ri_k[k]);
4659     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4660     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4661   }
4662 
4663   /* set values of ba */
4664   m = merge->rowmap->n;
4665   for (i=0; i<m; i++) {
4666     arow = owners[rank] + i;
4667     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4668     bnzi = bi[i+1] - bi[i];
4669     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4670 
4671     /* add local non-zero vals of this proc's seqmat into ba */
4672     anzi   = ai[arow+1] - ai[arow];
4673     aj     = a->j + ai[arow];
4674     aa     = a->a + ai[arow];
4675     nextaj = 0;
4676     for (j=0; nextaj<anzi; j++) {
4677       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4678         ba_i[j] += aa[nextaj++];
4679       }
4680     }
4681 
4682     /* add received vals into ba */
4683     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4684       /* i-th row */
4685       if (i == *nextrow[k]) {
4686         anzi   = *(nextai[k]+1) - *nextai[k];
4687         aj     = buf_rj[k] + *(nextai[k]);
4688         aa     = abuf_r[k] + *(nextai[k]);
4689         nextaj = 0;
4690         for (j=0; nextaj<anzi; j++) {
4691           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4692             ba_i[j] += aa[nextaj++];
4693           }
4694         }
4695         nextrow[k]++; nextai[k]++;
4696       }
4697     }
4698     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4699   }
4700   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4702 
4703   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4704   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4705   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4706   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4707   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4715 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4716 {
4717   PetscErrorCode      ierr;
4718   Mat                 B_mpi;
4719   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4720   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4721   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4722   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4723   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4724   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4725   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4726   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4727   MPI_Status          *status;
4728   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4729   PetscBT             lnkbt;
4730   Mat_Merge_SeqsToMPI *merge;
4731   PetscContainer      container;
4732 
4733   PetscFunctionBegin;
4734   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4735 
4736   /* make sure it is a PETSc comm */
4737   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4739   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4740 
4741   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4742   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4743 
4744   /* determine row ownership */
4745   /*---------------------------------------------------------*/
4746   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4747   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4748   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4749   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4750   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4751   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4752   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4753 
4754   m      = merge->rowmap->n;
4755   owners = merge->rowmap->range;
4756 
4757   /* determine the number of messages to send, their lengths */
4758   /*---------------------------------------------------------*/
4759   len_s = merge->len_s;
4760 
4761   len          = 0; /* length of buf_si[] */
4762   merge->nsend = 0;
4763   for (proc=0; proc<size; proc++) {
4764     len_si[proc] = 0;
4765     if (proc == rank) {
4766       len_s[proc] = 0;
4767     } else {
4768       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4769       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4770     }
4771     if (len_s[proc]) {
4772       merge->nsend++;
4773       nrows = 0;
4774       for (i=owners[proc]; i<owners[proc+1]; i++) {
4775         if (ai[i+1] > ai[i]) nrows++;
4776       }
4777       len_si[proc] = 2*(nrows+1);
4778       len         += len_si[proc];
4779     }
4780   }
4781 
4782   /* determine the number and length of messages to receive for ij-structure */
4783   /*-------------------------------------------------------------------------*/
4784   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4785   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4786 
4787   /* post the Irecv of j-structure */
4788   /*-------------------------------*/
4789   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4790   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4791 
4792   /* post the Isend of j-structure */
4793   /*--------------------------------*/
4794   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4795 
4796   for (proc=0, k=0; proc<size; proc++) {
4797     if (!len_s[proc]) continue;
4798     i    = owners[proc];
4799     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4800     k++;
4801   }
4802 
4803   /* receives and sends of j-structure are complete */
4804   /*------------------------------------------------*/
4805   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4806   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4807 
4808   /* send and recv i-structure */
4809   /*---------------------------*/
4810   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4811   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4812 
4813   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4814   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4815   for (proc=0,k=0; proc<size; proc++) {
4816     if (!len_s[proc]) continue;
4817     /* form outgoing message for i-structure:
4818          buf_si[0]:                 nrows to be sent
4819                [1:nrows]:           row index (global)
4820                [nrows+1:2*nrows+1]: i-structure index
4821     */
4822     /*-------------------------------------------*/
4823     nrows       = len_si[proc]/2 - 1;
4824     buf_si_i    = buf_si + nrows+1;
4825     buf_si[0]   = nrows;
4826     buf_si_i[0] = 0;
4827     nrows       = 0;
4828     for (i=owners[proc]; i<owners[proc+1]; i++) {
4829       anzi = ai[i+1] - ai[i];
4830       if (anzi) {
4831         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4832         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4833         nrows++;
4834       }
4835     }
4836     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4837     k++;
4838     buf_si += len_si[proc];
4839   }
4840 
4841   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4842   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4843 
4844   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4845   for (i=0; i<merge->nrecv; i++) {
4846     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4847   }
4848 
4849   ierr = PetscFree(len_si);CHKERRQ(ierr);
4850   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4851   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4852   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4853   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4854   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4855   ierr = PetscFree(status);CHKERRQ(ierr);
4856 
4857   /* compute a local seq matrix in each processor */
4858   /*----------------------------------------------*/
4859   /* allocate bi array and free space for accumulating nonzero column info */
4860   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4861   bi[0] = 0;
4862 
4863   /* create and initialize a linked list */
4864   nlnk = N+1;
4865   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866 
4867   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4868   len  = ai[owners[rank+1]] - ai[owners[rank]];
4869   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4870 
4871   current_space = free_space;
4872 
4873   /* determine symbolic info for each local row */
4874   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4875 
4876   for (k=0; k<merge->nrecv; k++) {
4877     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4878     nrows       = *buf_ri_k[k];
4879     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4880     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4881   }
4882 
4883   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4884   len  = 0;
4885   for (i=0; i<m; i++) {
4886     bnzi = 0;
4887     /* add local non-zero cols of this proc's seqmat into lnk */
4888     arow  = owners[rank] + i;
4889     anzi  = ai[arow+1] - ai[arow];
4890     aj    = a->j + ai[arow];
4891     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4892     bnzi += nlnk;
4893     /* add received col data into lnk */
4894     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4895       if (i == *nextrow[k]) { /* i-th row */
4896         anzi  = *(nextai[k]+1) - *nextai[k];
4897         aj    = buf_rj[k] + *nextai[k];
4898         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4899         bnzi += nlnk;
4900         nextrow[k]++; nextai[k]++;
4901       }
4902     }
4903     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4904 
4905     /* if free space is not available, make more free space */
4906     if (current_space->local_remaining<bnzi) {
4907       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4908       nspacedouble++;
4909     }
4910     /* copy data into free space, then initialize lnk */
4911     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4912     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4913 
4914     current_space->array           += bnzi;
4915     current_space->local_used      += bnzi;
4916     current_space->local_remaining -= bnzi;
4917 
4918     bi[i+1] = bi[i] + bnzi;
4919   }
4920 
4921   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4922 
4923   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4924   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4925   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4926 
4927   /* create symbolic parallel matrix B_mpi */
4928   /*---------------------------------------*/
4929   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4930   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4931   if (n==PETSC_DECIDE) {
4932     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4933   } else {
4934     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4935   }
4936   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4937   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4938   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4939   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4940   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4941 
4942   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4943   B_mpi->assembled    = PETSC_FALSE;
4944   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4945   merge->bi           = bi;
4946   merge->bj           = bj;
4947   merge->buf_ri       = buf_ri;
4948   merge->buf_rj       = buf_rj;
4949   merge->coi          = NULL;
4950   merge->coj          = NULL;
4951   merge->owners_co    = NULL;
4952 
4953   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4954 
4955   /* attach the supporting struct to B_mpi for reuse */
4956   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4957   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4958   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4959   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4960   *mpimat = B_mpi;
4961 
4962   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4963   PetscFunctionReturn(0);
4964 }
4965 
4966 #undef __FUNCT__
4967 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4968 /*@C
4969       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4970                  matrices from each processor
4971 
4972     Collective on MPI_Comm
4973 
4974    Input Parameters:
4975 +    comm - the communicators the parallel matrix will live on
4976 .    seqmat - the input sequential matrices
4977 .    m - number of local rows (or PETSC_DECIDE)
4978 .    n - number of local columns (or PETSC_DECIDE)
4979 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4980 
4981    Output Parameter:
4982 .    mpimat - the parallel matrix generated
4983 
4984     Level: advanced
4985 
4986    Notes:
4987      The dimensions of the sequential matrix in each processor MUST be the same.
4988      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4989      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4990 @*/
4991 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4992 {
4993   PetscErrorCode ierr;
4994   PetscMPIInt    size;
4995 
4996   PetscFunctionBegin;
4997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4998   if (size == 1) {
4999     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000     if (scall == MAT_INITIAL_MATRIX) {
5001       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5002     } else {
5003       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5004     }
5005     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5006     PetscFunctionReturn(0);
5007   }
5008   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5011   }
5012   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5013   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5019 /*@
5020      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5021           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5022           with MatGetSize()
5023 
5024     Not Collective
5025 
5026    Input Parameters:
5027 +    A - the matrix
5028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 
5030    Output Parameter:
5031 .    A_loc - the local sequential matrix generated
5032 
5033     Level: developer
5034 
5035 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5036 
5037 @*/
5038 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5039 {
5040   PetscErrorCode ierr;
5041   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5042   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5043   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5044   MatScalar      *aa=a->a,*ba=b->a,*cam;
5045   PetscScalar    *ca;
5046   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5047   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5048   PetscBool      match;
5049 
5050   PetscFunctionBegin;
5051   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5052   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5053   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5054   if (scall == MAT_INITIAL_MATRIX) {
5055     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5056     ci[0] = 0;
5057     for (i=0; i<am; i++) {
5058       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5059     }
5060     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5061     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5062     k    = 0;
5063     for (i=0; i<am; i++) {
5064       ncols_o = bi[i+1] - bi[i];
5065       ncols_d = ai[i+1] - ai[i];
5066       /* off-diagonal portion of A */
5067       for (jo=0; jo<ncols_o; jo++) {
5068         col = cmap[*bj];
5069         if (col >= cstart) break;
5070         cj[k]   = col; bj++;
5071         ca[k++] = *ba++;
5072       }
5073       /* diagonal portion of A */
5074       for (j=0; j<ncols_d; j++) {
5075         cj[k]   = cstart + *aj++;
5076         ca[k++] = *aa++;
5077       }
5078       /* off-diagonal portion of A */
5079       for (j=jo; j<ncols_o; j++) {
5080         cj[k]   = cmap[*bj++];
5081         ca[k++] = *ba++;
5082       }
5083     }
5084     /* put together the new matrix */
5085     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5086     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5087     /* Since these are PETSc arrays, change flags to free them as necessary. */
5088     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5089     mat->free_a  = PETSC_TRUE;
5090     mat->free_ij = PETSC_TRUE;
5091     mat->nonew   = 0;
5092   } else if (scall == MAT_REUSE_MATRIX) {
5093     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5094     ci = mat->i; cj = mat->j; cam = mat->a;
5095     for (i=0; i<am; i++) {
5096       /* off-diagonal portion of A */
5097       ncols_o = bi[i+1] - bi[i];
5098       for (jo=0; jo<ncols_o; jo++) {
5099         col = cmap[*bj];
5100         if (col >= cstart) break;
5101         *cam++ = *ba++; bj++;
5102       }
5103       /* diagonal portion of A */
5104       ncols_d = ai[i+1] - ai[i];
5105       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5106       /* off-diagonal portion of A */
5107       for (j=jo; j<ncols_o; j++) {
5108         *cam++ = *ba++; bj++;
5109       }
5110     }
5111   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5112   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5113   PetscFunctionReturn(0);
5114 }
5115 
5116 #undef __FUNCT__
5117 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5118 /*@C
5119      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5120 
5121     Not Collective
5122 
5123    Input Parameters:
5124 +    A - the matrix
5125 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5126 -    row, col - index sets of rows and columns to extract (or NULL)
5127 
5128    Output Parameter:
5129 .    A_loc - the local sequential matrix generated
5130 
5131     Level: developer
5132 
5133 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5134 
5135 @*/
5136 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5137 {
5138   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5139   PetscErrorCode ierr;
5140   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5141   IS             isrowa,iscola;
5142   Mat            *aloc;
5143   PetscBool      match;
5144 
5145   PetscFunctionBegin;
5146   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5147   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5148   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5149   if (!row) {
5150     start = A->rmap->rstart; end = A->rmap->rend;
5151     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5152   } else {
5153     isrowa = *row;
5154   }
5155   if (!col) {
5156     start = A->cmap->rstart;
5157     cmap  = a->garray;
5158     nzA   = a->A->cmap->n;
5159     nzB   = a->B->cmap->n;
5160     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5161     ncols = 0;
5162     for (i=0; i<nzB; i++) {
5163       if (cmap[i] < start) idx[ncols++] = cmap[i];
5164       else break;
5165     }
5166     imark = i;
5167     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5168     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5169     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5170   } else {
5171     iscola = *col;
5172   }
5173   if (scall != MAT_INITIAL_MATRIX) {
5174     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5175     aloc[0] = *A_loc;
5176   }
5177   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5178   *A_loc = aloc[0];
5179   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5180   if (!row) {
5181     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5182   }
5183   if (!col) {
5184     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5185   }
5186   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5187   PetscFunctionReturn(0);
5188 }
5189 
5190 #undef __FUNCT__
5191 #define __FUNCT__ "MatGetBrowsOfAcols"
5192 /*@C
5193     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5194 
5195     Collective on Mat
5196 
5197    Input Parameters:
5198 +    A,B - the matrices in mpiaij format
5199 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5200 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5201 
5202    Output Parameter:
5203 +    rowb, colb - index sets of rows and columns of B to extract
5204 -    B_seq - the sequential matrix generated
5205 
5206     Level: developer
5207 
5208 @*/
5209 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5210 {
5211   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5212   PetscErrorCode ierr;
5213   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5214   IS             isrowb,iscolb;
5215   Mat            *bseq=NULL;
5216 
5217   PetscFunctionBegin;
5218   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5219     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5220   }
5221   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5222 
5223   if (scall == MAT_INITIAL_MATRIX) {
5224     start = A->cmap->rstart;
5225     cmap  = a->garray;
5226     nzA   = a->A->cmap->n;
5227     nzB   = a->B->cmap->n;
5228     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5229     ncols = 0;
5230     for (i=0; i<nzB; i++) {  /* row < local row index */
5231       if (cmap[i] < start) idx[ncols++] = cmap[i];
5232       else break;
5233     }
5234     imark = i;
5235     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5236     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5237     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5238     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5239   } else {
5240     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5241     isrowb  = *rowb; iscolb = *colb;
5242     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5243     bseq[0] = *B_seq;
5244   }
5245   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5246   *B_seq = bseq[0];
5247   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5248   if (!rowb) {
5249     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5250   } else {
5251     *rowb = isrowb;
5252   }
5253   if (!colb) {
5254     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5255   } else {
5256     *colb = iscolb;
5257   }
5258   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5259   PetscFunctionReturn(0);
5260 }
5261 
5262 #undef __FUNCT__
5263 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5264 /*
5265     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5266     of the OFF-DIAGONAL portion of local A
5267 
5268     Collective on Mat
5269 
5270    Input Parameters:
5271 +    A,B - the matrices in mpiaij format
5272 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5273 
5274    Output Parameter:
5275 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5276 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5277 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5278 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5279 
5280     Level: developer
5281 
5282 */
5283 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5284 {
5285   VecScatter_MPI_General *gen_to,*gen_from;
5286   PetscErrorCode         ierr;
5287   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5288   Mat_SeqAIJ             *b_oth;
5289   VecScatter             ctx =a->Mvctx;
5290   MPI_Comm               comm;
5291   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5292   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5293   PetscScalar            *rvalues,*svalues;
5294   MatScalar              *b_otha,*bufa,*bufA;
5295   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5296   MPI_Request            *rwaits = NULL,*swaits = NULL;
5297   MPI_Status             *sstatus,rstatus;
5298   PetscMPIInt            jj;
5299   PetscInt               *cols,sbs,rbs;
5300   PetscScalar            *vals;
5301 
5302   PetscFunctionBegin;
5303   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5304   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5305     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5306   }
5307   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5309 
5310   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5311   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5312   rvalues  = gen_from->values; /* holds the length of receiving row */
5313   svalues  = gen_to->values;   /* holds the length of sending row */
5314   nrecvs   = gen_from->n;
5315   nsends   = gen_to->n;
5316 
5317   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5318   srow    = gen_to->indices;    /* local row index to be sent */
5319   sstarts = gen_to->starts;
5320   sprocs  = gen_to->procs;
5321   sstatus = gen_to->sstatus;
5322   sbs     = gen_to->bs;
5323   rstarts = gen_from->starts;
5324   rprocs  = gen_from->procs;
5325   rbs     = gen_from->bs;
5326 
5327   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5328   if (scall == MAT_INITIAL_MATRIX) {
5329     /* i-array */
5330     /*---------*/
5331     /*  post receives */
5332     for (i=0; i<nrecvs; i++) {
5333       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5334       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5335       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5336     }
5337 
5338     /* pack the outgoing message */
5339     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5340 
5341     sstartsj[0] = 0;
5342     rstartsj[0] = 0;
5343     len         = 0; /* total length of j or a array to be sent */
5344     k           = 0;
5345     for (i=0; i<nsends; i++) {
5346       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5347       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5348       for (j=0; j<nrows; j++) {
5349         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5350         for (l=0; l<sbs; l++) {
5351           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5352 
5353           rowlen[j*sbs+l] = ncols;
5354 
5355           len += ncols;
5356           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5357         }
5358         k++;
5359       }
5360       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5361 
5362       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5363     }
5364     /* recvs and sends of i-array are completed */
5365     i = nrecvs;
5366     while (i--) {
5367       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5368     }
5369     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5370 
5371     /* allocate buffers for sending j and a arrays */
5372     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5373     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5374 
5375     /* create i-array of B_oth */
5376     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5377 
5378     b_othi[0] = 0;
5379     len       = 0; /* total length of j or a array to be received */
5380     k         = 0;
5381     for (i=0; i<nrecvs; i++) {
5382       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5383       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5384       for (j=0; j<nrows; j++) {
5385         b_othi[k+1] = b_othi[k] + rowlen[j];
5386         len        += rowlen[j]; k++;
5387       }
5388       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5389     }
5390 
5391     /* allocate space for j and a arrrays of B_oth */
5392     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5393     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5394 
5395     /* j-array */
5396     /*---------*/
5397     /*  post receives of j-array */
5398     for (i=0; i<nrecvs; i++) {
5399       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5400       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5401     }
5402 
5403     /* pack the outgoing message j-array */
5404     k = 0;
5405     for (i=0; i<nsends; i++) {
5406       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5407       bufJ  = bufj+sstartsj[i];
5408       for (j=0; j<nrows; j++) {
5409         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5410         for (ll=0; ll<sbs; ll++) {
5411           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5412           for (l=0; l<ncols; l++) {
5413             *bufJ++ = cols[l];
5414           }
5415           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5416         }
5417       }
5418       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5419     }
5420 
5421     /* recvs and sends of j-array are completed */
5422     i = nrecvs;
5423     while (i--) {
5424       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5425     }
5426     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5427   } else if (scall == MAT_REUSE_MATRIX) {
5428     sstartsj = *startsj_s;
5429     rstartsj = *startsj_r;
5430     bufa     = *bufa_ptr;
5431     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5432     b_otha   = b_oth->a;
5433   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5434 
5435   /* a-array */
5436   /*---------*/
5437   /*  post receives of a-array */
5438   for (i=0; i<nrecvs; i++) {
5439     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5440     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5441   }
5442 
5443   /* pack the outgoing message a-array */
5444   k = 0;
5445   for (i=0; i<nsends; i++) {
5446     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5447     bufA  = bufa+sstartsj[i];
5448     for (j=0; j<nrows; j++) {
5449       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5450       for (ll=0; ll<sbs; ll++) {
5451         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5452         for (l=0; l<ncols; l++) {
5453           *bufA++ = vals[l];
5454         }
5455         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5456       }
5457     }
5458     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5459   }
5460   /* recvs and sends of a-array are completed */
5461   i = nrecvs;
5462   while (i--) {
5463     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5464   }
5465   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5466   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5467 
5468   if (scall == MAT_INITIAL_MATRIX) {
5469     /* put together the new matrix */
5470     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5471 
5472     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5473     /* Since these are PETSc arrays, change flags to free them as necessary. */
5474     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5475     b_oth->free_a  = PETSC_TRUE;
5476     b_oth->free_ij = PETSC_TRUE;
5477     b_oth->nonew   = 0;
5478 
5479     ierr = PetscFree(bufj);CHKERRQ(ierr);
5480     if (!startsj_s || !bufa_ptr) {
5481       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5482       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5483     } else {
5484       *startsj_s = sstartsj;
5485       *startsj_r = rstartsj;
5486       *bufa_ptr  = bufa;
5487     }
5488   }
5489   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5490   PetscFunctionReturn(0);
5491 }
5492 
5493 #undef __FUNCT__
5494 #define __FUNCT__ "MatGetCommunicationStructs"
5495 /*@C
5496   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5497 
5498   Not Collective
5499 
5500   Input Parameters:
5501 . A - The matrix in mpiaij format
5502 
5503   Output Parameter:
5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5505 . colmap - A map from global column index to local index into lvec
5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5507 
5508   Level: developer
5509 
5510 @*/
5511 #if defined(PETSC_USE_CTABLE)
5512 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5513 #else
5514 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5515 #endif
5516 {
5517   Mat_MPIAIJ *a;
5518 
5519   PetscFunctionBegin;
5520   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5521   PetscValidPointer(lvec, 2);
5522   PetscValidPointer(colmap, 3);
5523   PetscValidPointer(multScatter, 4);
5524   a = (Mat_MPIAIJ*) A->data;
5525   if (lvec) *lvec = a->lvec;
5526   if (colmap) *colmap = a->colmap;
5527   if (multScatter) *multScatter = a->Mvctx;
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5533 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5534 
5535 #undef __FUNCT__
5536 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5537 /*
5538     Computes (B'*A')' since computing B*A directly is untenable
5539 
5540                n                       p                          p
5541         (              )       (              )         (                  )
5542       m (      A       )  *  n (       B      )   =   m (         C        )
5543         (              )       (              )         (                  )
5544 
5545 */
5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5547 {
5548   PetscErrorCode ierr;
5549   Mat            At,Bt,Ct;
5550 
5551   PetscFunctionBegin;
5552   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5553   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5554   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5555   ierr = MatDestroy(&At);CHKERRQ(ierr);
5556   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5557   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5558   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 #undef __FUNCT__
5563 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5564 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5565 {
5566   PetscErrorCode ierr;
5567   PetscInt       m=A->rmap->n,n=B->cmap->n;
5568   Mat            Cmat;
5569 
5570   PetscFunctionBegin;
5571   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5572   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5573   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5574   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5575   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5576   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5577   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5578   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5579 
5580   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5581 
5582   *C = Cmat;
5583   PetscFunctionReturn(0);
5584 }
5585 
5586 /* ----------------------------------------------------------------*/
5587 #undef __FUNCT__
5588 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5589 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5590 {
5591   PetscErrorCode ierr;
5592 
5593   PetscFunctionBegin;
5594   if (scall == MAT_INITIAL_MATRIX) {
5595     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5596     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5597     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5598   }
5599   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5600   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5601   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 #if defined(PETSC_HAVE_MUMPS)
5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5607 #endif
5608 #if defined(PETSC_HAVE_PASTIX)
5609 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5610 #endif
5611 #if defined(PETSC_HAVE_SUPERLU_DIST)
5612 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5613 #endif
5614 #if defined(PETSC_HAVE_CLIQUE)
5615 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5616 #endif
5617 
5618 /*MC
5619    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5620 
5621    Options Database Keys:
5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5623 
5624   Level: beginner
5625 
5626 .seealso: MatCreateAIJ()
5627 M*/
5628 
5629 #undef __FUNCT__
5630 #define __FUNCT__ "MatCreate_MPIAIJ"
5631 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5632 {
5633   Mat_MPIAIJ     *b;
5634   PetscErrorCode ierr;
5635   PetscMPIInt    size;
5636 
5637   PetscFunctionBegin;
5638   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5639 
5640   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5641   B->data       = (void*)b;
5642   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5643   B->assembled  = PETSC_FALSE;
5644   B->insertmode = NOT_SET_VALUES;
5645   b->size       = size;
5646 
5647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5648 
5649   /* build cache for off array entries formed */
5650   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5651 
5652   b->donotstash  = PETSC_FALSE;
5653   b->colmap      = 0;
5654   b->garray      = 0;
5655   b->roworiented = PETSC_TRUE;
5656 
5657   /* stuff used for matrix vector multiply */
5658   b->lvec  = NULL;
5659   b->Mvctx = NULL;
5660 
5661   /* stuff for MatGetRow() */
5662   b->rowindices   = 0;
5663   b->rowvalues    = 0;
5664   b->getrowactive = PETSC_FALSE;
5665 
5666   /* flexible pointer used in CUSP/CUSPARSE classes */
5667   b->spptr = NULL;
5668 
5669 #if defined(PETSC_HAVE_MUMPS)
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5671 #endif
5672 #if defined(PETSC_HAVE_PASTIX)
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5674 #endif
5675 #if defined(PETSC_HAVE_SUPERLU_DIST)
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5677 #endif
5678 #if defined(PETSC_HAVE_CLIQUE)
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5680 #endif
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5694   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5695   PetscFunctionReturn(0);
5696 }
5697 
5698 #undef __FUNCT__
5699 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5700 /*@
5701      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5702          and "off-diagonal" part of the matrix in CSR format.
5703 
5704    Collective on MPI_Comm
5705 
5706    Input Parameters:
5707 +  comm - MPI communicator
5708 .  m - number of local rows (Cannot be PETSC_DECIDE)
5709 .  n - This value should be the same as the local size used in creating the
5710        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5711        calculated if N is given) For square matrices n is almost always m.
5712 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5713 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5714 .   i - row indices for "diagonal" portion of matrix
5715 .   j - column indices
5716 .   a - matrix values
5717 .   oi - row indices for "off-diagonal" portion of matrix
5718 .   oj - column indices
5719 -   oa - matrix values
5720 
5721    Output Parameter:
5722 .   mat - the matrix
5723 
5724    Level: advanced
5725 
5726    Notes:
5727        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5728        must free the arrays once the matrix has been destroyed and not before.
5729 
5730        The i and j indices are 0 based
5731 
5732        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5733 
5734        This sets local rows and cannot be used to set off-processor values.
5735 
5736        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5737        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5738        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5739        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5740        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5741        communication if it is known that only local entries will be set.
5742 
5743 .keywords: matrix, aij, compressed row, sparse, parallel
5744 
5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5746           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5747 @*/
5748 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5749 {
5750   PetscErrorCode ierr;
5751   Mat_MPIAIJ     *maij;
5752 
5753   PetscFunctionBegin;
5754   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5755   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5756   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5757   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5758   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5759   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5760   maij = (Mat_MPIAIJ*) (*mat)->data;
5761 
5762   (*mat)->preallocated = PETSC_TRUE;
5763 
5764   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5765   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5766 
5767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5768   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5769 
5770   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774 
5775   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5777   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5778   PetscFunctionReturn(0);
5779 }
5780 
5781 /*
5782     Special version for direct calls from Fortran
5783 */
5784 #include <petsc-private/fortranimpl.h>
5785 
5786 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5787 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5788 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5789 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5790 #endif
5791 
5792 /* Change these macros so can be used in void function */
5793 #undef CHKERRQ
5794 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5795 #undef SETERRQ2
5796 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5797 #undef SETERRQ3
5798 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5799 #undef SETERRQ
5800 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5801 
5802 #undef __FUNCT__
5803 #define __FUNCT__ "matsetvaluesmpiaij_"
5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5805 {
5806   Mat            mat  = *mmat;
5807   PetscInt       m    = *mm, n = *mn;
5808   InsertMode     addv = *maddv;
5809   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5810   PetscScalar    value;
5811   PetscErrorCode ierr;
5812 
5813   MatCheckPreallocated(mat,1);
5814   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5815 
5816 #if defined(PETSC_USE_DEBUG)
5817   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5818 #endif
5819   {
5820     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5821     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5822     PetscBool roworiented = aij->roworiented;
5823 
5824     /* Some Variables required in the macro */
5825     Mat        A                 = aij->A;
5826     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5827     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5828     MatScalar  *aa               = a->a;
5829     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5830     Mat        B                 = aij->B;
5831     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5832     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5833     MatScalar  *ba               = b->a;
5834 
5835     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5836     PetscInt  nonew = a->nonew;
5837     MatScalar *ap1,*ap2;
5838 
5839     PetscFunctionBegin;
5840     for (i=0; i<m; i++) {
5841       if (im[i] < 0) continue;
5842 #if defined(PETSC_USE_DEBUG)
5843       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5844 #endif
5845       if (im[i] >= rstart && im[i] < rend) {
5846         row      = im[i] - rstart;
5847         lastcol1 = -1;
5848         rp1      = aj + ai[row];
5849         ap1      = aa + ai[row];
5850         rmax1    = aimax[row];
5851         nrow1    = ailen[row];
5852         low1     = 0;
5853         high1    = nrow1;
5854         lastcol2 = -1;
5855         rp2      = bj + bi[row];
5856         ap2      = ba + bi[row];
5857         rmax2    = bimax[row];
5858         nrow2    = bilen[row];
5859         low2     = 0;
5860         high2    = nrow2;
5861 
5862         for (j=0; j<n; j++) {
5863           if (roworiented) value = v[i*n+j];
5864           else value = v[i+j*m];
5865           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5866           if (in[j] >= cstart && in[j] < cend) {
5867             col = in[j] - cstart;
5868             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5869           } else if (in[j] < 0) continue;
5870 #if defined(PETSC_USE_DEBUG)
5871           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5872 #endif
5873           else {
5874             if (mat->was_assembled) {
5875               if (!aij->colmap) {
5876                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5877               }
5878 #if defined(PETSC_USE_CTABLE)
5879               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5880               col--;
5881 #else
5882               col = aij->colmap[in[j]] - 1;
5883 #endif
5884               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5885                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5886                 col  =  in[j];
5887                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5888                 B     = aij->B;
5889                 b     = (Mat_SeqAIJ*)B->data;
5890                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5891                 rp2   = bj + bi[row];
5892                 ap2   = ba + bi[row];
5893                 rmax2 = bimax[row];
5894                 nrow2 = bilen[row];
5895                 low2  = 0;
5896                 high2 = nrow2;
5897                 bm    = aij->B->rmap->n;
5898                 ba    = b->a;
5899               }
5900             } else col = in[j];
5901             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5902           }
5903         }
5904       } else if (!aij->donotstash) {
5905         if (roworiented) {
5906           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5907         } else {
5908           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5909         }
5910       }
5911     }
5912   }
5913   PetscFunctionReturnVoid();
5914 }
5915 
5916