xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia        = a->i;
58   ib        = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75 ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt  = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr   = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213 
214     rowners[0] = 0;
215     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
216     rstart = rowners[rank];
217     rend   = rowners[rank+1];
218     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
219     if (!rank) {
220       gmata = (Mat_SeqAIJ*) gmat->data;
221       /* send row lengths to all processors */
222       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
223       for (i=1; i<size; i++) {
224         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
225       }
226       /* determine number diagonal and off-diagonal counts */
227       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
228       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
229       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
230       jj   = 0;
231       for (i=0; i<m; i++) {
232         for (j=0; j<dlens[i]; j++) {
233           if (gmata->j[jj] < rstart) ld[i]++;
234           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
235           jj++;
236         }
237       }
238       /* send column indices to other processes */
239       for (i=1; i<size; i++) {
240         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
241         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
242         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243       }
244 
245       /* send numerical values to other processes */
246       for (i=1; i<size; i++) {
247         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
248         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
249       }
250       gmataa = gmata->a;
251       gmataj = gmata->j;
252 
253     } else {
254       /* receive row lengths */
255       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       /* receive column indices */
257       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
259       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
260       /* determine number diagonal and off-diagonal counts */
261       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
262       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
263       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   CHKMEMQ;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350   Local utility routine that creates a mapping from the global column
351 number to the local number in the off-diagonal part of the local
352 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
353 a slightly higher hash table cost; without it it is not scalable (each processor
354 has an order N integer array but is fast to acess.
355 */
356 #undef __FUNCT__
357 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
358 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
359 {
360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
361   PetscErrorCode ierr;
362   PetscInt       n = aij->B->cmap->n,i;
363 
364   PetscFunctionBegin;
365   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
366 #if defined(PETSC_USE_CTABLE)
367   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
368   for (i=0; i<n; i++) {
369     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
370   }
371 #else
372   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
373   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
376 #endif
377   PetscFunctionReturn(0);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
381 { \
382     if (col <= lastcol1)  low1 = 0;     \
383     else                 high1 = nrow1; \
384     lastcol1 = col;\
385     while (high1-low1 > 5) { \
386       t = (low1+high1)/2; \
387       if (rp1[t] > col) high1 = t; \
388       else              low1  = t; \
389     } \
390       for (_i=low1; _i<high1; _i++) { \
391         if (rp1[_i] > col) break; \
392         if (rp1[_i] == col) { \
393           if (addv == ADD_VALUES) ap1[_i] += value;   \
394           else                    ap1[_i] = value; \
395           goto a_noinsert; \
396         } \
397       }  \
398       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
399       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
400       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
401       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
402       N = nrow1++ - 1; a->nz++; high1++; \
403       /* shift up all the later entries in this row */ \
404       for (ii=N; ii>=_i; ii--) { \
405         rp1[ii+1] = rp1[ii]; \
406         ap1[ii+1] = ap1[ii]; \
407       } \
408       rp1[_i] = col;  \
409       ap1[_i] = value;  \
410       a_noinsert: ; \
411       ailen[row] = nrow1; \
412 }
413 
414 
415 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
416   { \
417     if (col <= lastcol2) low2 = 0;                        \
418     else high2 = nrow2;                                   \
419     lastcol2 = col;                                       \
420     while (high2-low2 > 5) {                              \
421       t = (low2+high2)/2;                                 \
422       if (rp2[t] > col) high2 = t;                        \
423       else             low2  = t;                         \
424     }                                                     \
425     for (_i=low2; _i<high2; _i++) {                       \
426       if (rp2[_i] > col) break;                           \
427       if (rp2[_i] == col) {                               \
428         if (addv == ADD_VALUES) ap2[_i] += value;         \
429         else                    ap2[_i] = value;          \
430         goto b_noinsert;                                  \
431       }                                                   \
432     }                                                     \
433     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
434     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
435     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
436     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
437     N = nrow2++ - 1; b->nz++; high2++;                    \
438     /* shift up all the later entries in this row */      \
439     for (ii=N; ii>=_i; ii--) {                            \
440       rp2[ii+1] = rp2[ii];                                \
441       ap2[ii+1] = ap2[ii];                                \
442     }                                                     \
443     rp2[_i] = col;                                        \
444     ap2[_i] = value;                                      \
445     b_noinsert: ;                                         \
446     bilen[row] = nrow2;                                   \
447   }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
452 {
453   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
454   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
455   PetscErrorCode ierr;
456   PetscInt       l,*garray = mat->garray,diag;
457 
458   PetscFunctionBegin;
459   /* code only works for square matrices A */
460 
461   /* find size of row to the left of the diagonal part */
462   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
463   row  = row - diag;
464   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
465     if (garray[b->j[b->i[row]+l]] > diag) break;
466   }
467   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* diagonal part */
470   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
471 
472   /* right of diagonal part */
473   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatSetValues_MPIAIJ"
479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   PetscScalar    value;
483   PetscErrorCode ierr;
484   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
485   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
486   PetscBool      roworiented = aij->roworiented;
487 
488   /* Some Variables required in the macro */
489   Mat        A                 = aij->A;
490   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
491   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
492   MatScalar  *aa               = a->a;
493   PetscBool  ignorezeroentries = a->ignorezeroentries;
494   Mat        B                 = aij->B;
495   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
496   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
497   MatScalar  *ba               = b->a;
498 
499   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
500   PetscInt  nonew;
501   MatScalar *ap1,*ap2;
502 
503   PetscFunctionBegin;
504   if (v) PetscValidScalarPointer(v,6);
505   for (i=0; i<m; i++) {
506     if (im[i] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
509 #endif
510     if (im[i] >= rstart && im[i] < rend) {
511       row      = im[i] - rstart;
512       lastcol1 = -1;
513       rp1      = aj + ai[row];
514       ap1      = aa + ai[row];
515       rmax1    = aimax[row];
516       nrow1    = ailen[row];
517       low1     = 0;
518       high1    = nrow1;
519       lastcol2 = -1;
520       rp2      = bj + bi[row];
521       ap2      = ba + bi[row];
522       rmax2    = bimax[row];
523       nrow2    = bilen[row];
524       low2     = 0;
525       high2    = nrow2;
526 
527       for (j=0; j<n; j++) {
528         if (v) {
529           if (roworiented) value = v[i*n+j];
530           else             value = v[i+j*m];
531         } else value = 0.0;
532         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
533         if (in[j] >= cstart && in[j] < cend) {
534           col   = in[j] - cstart;
535           nonew = a->nonew;
536           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
537         } else if (in[j] < 0) continue;
538 #if defined(PETSC_USE_DEBUG)
539         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
540 #endif
541         else {
542           if (mat->was_assembled) {
543             if (!aij->colmap) {
544               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
545             }
546 #if defined(PETSC_USE_CTABLE)
547             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
548             col--;
549 #else
550             col = aij->colmap[in[j]] - 1;
551 #endif
552             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
553               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
554               col  =  in[j];
555               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
556               B     = aij->B;
557               b     = (Mat_SeqAIJ*)B->data;
558               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
559               rp2   = bj + bi[row];
560               ap2   = ba + bi[row];
561               rmax2 = bimax[row];
562               nrow2 = bilen[row];
563               low2  = 0;
564               high2 = nrow2;
565               bm    = aij->B->rmap->n;
566               ba    = b->a;
567             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
568           } else col = in[j];
569           nonew = b->nonew;
570           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
571         }
572       }
573     } else {
574       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
575       if (!aij->donotstash) {
576         mat->assembled = PETSC_FALSE;
577         if (roworiented) {
578           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         } else {
580           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         }
582       }
583     }
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatGetValues_MPIAIJ"
590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
591 {
592   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
593   PetscErrorCode ierr;
594   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
595   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
596 
597   PetscFunctionBegin;
598   for (i=0; i<m; i++) {
599     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
600     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
601     if (idxm[i] >= rstart && idxm[i] < rend) {
602       row = idxm[i] - rstart;
603       for (j=0; j<n; j++) {
604         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
605         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
606         if (idxn[j] >= cstart && idxn[j] < cend) {
607           col  = idxn[j] - cstart;
608           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
609         } else {
610           if (!aij->colmap) {
611             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
612           }
613 #if defined(PETSC_USE_CTABLE)
614           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
615           col--;
616 #else
617           col = aij->colmap[idxn[j]] - 1;
618 #endif
619           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
620           else {
621             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
622           }
623         }
624       }
625     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
635 {
636   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
637   PetscErrorCode ierr;
638   PetscInt       nstash,reallocs;
639   InsertMode     addv;
640 
641   PetscFunctionBegin;
642   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
643 
644   /* make sure all processors are either in INSERTMODE or ADDMODE */
645   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
646   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
647   mat->insertmode = addv; /* in case this processor had no cache */
648 
649   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
650   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
651   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
658 {
659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
660   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
661   PetscErrorCode ierr;
662   PetscMPIInt    n;
663   PetscInt       i,j,rstart,ncols,flg;
664   PetscInt       *row,*col;
665   PetscBool      other_disassembled;
666   PetscScalar    *val;
667   InsertMode     addv = mat->insertmode;
668 
669   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
670 
671   PetscFunctionBegin;
672   if (!aij->donotstash && !mat->nooffprocentries) {
673     while (1) {
674       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
675       if (!flg) break;
676 
677       for (i=0; i<n; ) {
678         /* Now identify the consecutive vals belonging to the same row */
679         for (j=i,rstart=row[j]; j<n; j++) {
680           if (row[j] != rstart) break;
681         }
682         if (j < n) ncols = j-i;
683         else       ncols = n-i;
684         /* Now assemble all these values with a single function call */
685         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
686 
687         i = j;
688       }
689     }
690     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
691   }
692   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
693   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
694 
695   /* determine if any processor has disassembled, if so we must
696      also disassemble ourselfs, in order that we may reassemble. */
697   /*
698      if nonzero structure of submatrix B cannot change then we know that
699      no processor disassembled thus we can skip this stuff
700   */
701   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
702     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
703     if (mat->was_assembled && !other_disassembled) {
704       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
705     }
706   }
707   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
708     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
709   }
710   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
711   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
713   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
714 
715   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
716 
717   aij->rowvalues = 0;
718 
719   /* used by MatAXPY() */
720   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
721   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
722 
723   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
724   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
730 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
731 {
732   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
737   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatZeroRows_MPIAIJ"
743 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
744 {
745   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
748   PetscInt          i,*owners = A->rmap->range;
749   PetscInt          *nprocs,j,idx,nsends,row;
750   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
751   PetscInt          *rvalues,count,base,slen,*source;
752   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
753   MPI_Comm          comm = ((PetscObject)A)->comm;
754   MPI_Request       *send_waits,*recv_waits;
755   MPI_Status        recv_status,*send_status;
756   const PetscScalar *xx;
757   PetscScalar       *bb;
758 #if defined(PETSC_DEBUG)
759   PetscBool found = PETSC_FALSE;
760 #endif
761 
762   PetscFunctionBegin;
763   /*  first count number of contributors to each processor */
764   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
765   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
766   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
767   j    = 0;
768   for (i=0; i<N; i++) {
769     if (lastidx > (idx = rows[i])) j = 0;
770     lastidx = idx;
771     for (; j<size; j++) {
772       if (idx >= owners[j] && idx < owners[j+1]) {
773         nprocs[2*j]++;
774         nprocs[2*j+1] = 1;
775         owner[i]      = j;
776 #if defined(PETSC_DEBUG)
777         found = PETSC_TRUE;
778 #endif
779         break;
780       }
781     }
782 #if defined(PETSC_DEBUG)
783     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
784     found = PETSC_FALSE;
785 #endif
786   }
787   nsends = 0;
788   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
789 
790   if (A->nooffproczerorows) {
791     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
792     nrecvs = nsends;
793     nmax   = N;
794   } else {
795     /* inform other processors of number of messages and max length*/
796     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
797   }
798 
799   /* post receives:   */
800   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
801   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
802   for (i=0; i<nrecvs; i++) {
803     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
804   }
805 
806   /* do sends:
807       1) starts[i] gives the starting index in svalues for stuff going to
808          the ith processor
809   */
810   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
811   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
812   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
813 
814   starts[0] = 0;
815   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
816   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
817 
818   starts[0] = 0;
819   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
820   count = 0;
821   for (i=0; i<size; i++) {
822     if (nprocs[2*i+1]) {
823       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
824     }
825   }
826   ierr = PetscFree(starts);CHKERRQ(ierr);
827 
828   base = owners[rank];
829 
830   /*  wait on receives */
831   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
832   count = nrecvs; slen = 0;
833   while (count) {
834     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
835     /* unpack receives into our local space */
836     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
837 
838     source[imdex] = recv_status.MPI_SOURCE;
839     lens[imdex]   = n;
840     slen         += n;
841     count--;
842   }
843   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
844 
845   /* move the data into the send scatter */
846   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
847   count = 0;
848   for (i=0; i<nrecvs; i++) {
849     values = rvalues + i*nmax;
850     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
851   }
852   ierr = PetscFree(rvalues);CHKERRQ(ierr);
853   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
854   ierr = PetscFree(owner);CHKERRQ(ierr);
855   ierr = PetscFree(nprocs);CHKERRQ(ierr);
856 
857   /* fix right hand side if needed */
858   if (x && b) {
859     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
860     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
861     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
862     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
864   }
865   /*
866         Zero the required rows. If the "diagonal block" of the matrix
867      is square and the user wishes to set the diagonal we use separate
868      code so that MatSetValues() is not called for each diagonal allocating
869      new memory, thus calling lots of mallocs and slowing things down.
870 
871   */
872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
873   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
875     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
876   } else if (diag != 0.0) {
877     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
879     for (i = 0; i < slen; i++) {
880       row  = lrows[i] + rstart;
881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
882     }
883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885   } else {
886     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
887   }
888   ierr = PetscFree(lrows);CHKERRQ(ierr);
889 
890   /* wait on sends */
891   if (nsends) {
892     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
893     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
894     ierr = PetscFree(send_status);CHKERRQ(ierr);
895   }
896   ierr = PetscFree(send_waits);CHKERRQ(ierr);
897   ierr = PetscFree(svalues);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
903 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
904 {
905   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
906   PetscErrorCode    ierr;
907   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
908   PetscInt          i,*owners = A->rmap->range;
909   PetscInt          *nprocs,j,idx,nsends;
910   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
911   PetscInt          *rvalues,count,base,slen,*source;
912   PetscInt          *lens,*lrows,*values,m;
913   MPI_Comm          comm = ((PetscObject)A)->comm;
914   MPI_Request       *send_waits,*recv_waits;
915   MPI_Status        recv_status,*send_status;
916   const PetscScalar *xx;
917   PetscScalar       *bb,*mask;
918   Vec               xmask,lmask;
919   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
920   const PetscInt    *aj, *ii,*ridx;
921   PetscScalar       *aa;
922 #if defined(PETSC_DEBUG)
923   PetscBool         found = PETSC_FALSE;
924 #endif
925 
926   PetscFunctionBegin;
927   /*  first count number of contributors to each processor */
928   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
929   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
930   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
931   j    = 0;
932   for (i=0; i<N; i++) {
933     if (lastidx > (idx = rows[i])) j = 0;
934     lastidx = idx;
935     for (; j<size; j++) {
936       if (idx >= owners[j] && idx < owners[j+1]) {
937         nprocs[2*j]++;
938         nprocs[2*j+1] = 1;
939         owner[i]      = j;
940 #if defined(PETSC_DEBUG)
941         found = PETSC_TRUE;
942 #endif
943         break;
944       }
945     }
946 #if defined(PETSC_DEBUG)
947     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
948     found = PETSC_FALSE;
949 #endif
950   }
951   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
952 
953   /* inform other processors of number of messages and max length*/
954   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
955 
956   /* post receives:   */
957   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
958   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
959   for (i=0; i<nrecvs; i++) {
960     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
961   }
962 
963   /* do sends:
964       1) starts[i] gives the starting index in svalues for stuff going to
965          the ith processor
966   */
967   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
968   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
969   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
970 
971   starts[0] = 0;
972   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
973   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994 
995     source[imdex] = recv_status.MPI_SOURCE;
996     lens[imdex]   = n;
997     slen         += n;
998     count--;
999   }
1000   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1001 
1002   /* move the data into the send scatter */
1003   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1004   count = 0;
1005   for (i=0; i<nrecvs; i++) {
1006     values = rvalues + i*nmax;
1007     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1008   }
1009   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1010   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1011   ierr = PetscFree(owner);CHKERRQ(ierr);
1012   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1013   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1014 
1015   /* zero diagonal part of matrix */
1016   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1017 
1018   /* handle off diagonal part of matrix */
1019   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1020   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1021   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1022   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1023   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1024   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1027   if (x) {
1028     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1031     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1032   }
1033   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1034 
1035   /* remove zeroed rows of off diagonal matrix */
1036   ii = aij->i;
1037   for (i=0; i<slen; i++) {
1038     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1039   }
1040 
1041   /* loop over all elements of off process part of matrix zeroing removed columns*/
1042   if (aij->compressedrow.use) {
1043     m    = aij->compressedrow.nrows;
1044     ii   = aij->compressedrow.i;
1045     ridx = aij->compressedrow.rindex;
1046     for (i=0; i<m; i++) {
1047       n  = ii[i+1] - ii[i];
1048       aj = aij->j + ii[i];
1049       aa = aij->a + ii[i];
1050 
1051       for (j=0; j<n; j++) {
1052         if (PetscAbsScalar(mask[*aj])) {
1053           if (b) bb[*ridx] -= *aa*xx[*aj];
1054           *aa = 0.0;
1055         }
1056         aa++;
1057         aj++;
1058       }
1059       ridx++;
1060     }
1061   } else { /* do not use compressed row format */
1062     m = l->B->rmap->n;
1063     for (i=0; i<m; i++) {
1064       n  = ii[i+1] - ii[i];
1065       aj = aij->j + ii[i];
1066       aa = aij->a + ii[i];
1067       for (j=0; j<n; j++) {
1068         if (PetscAbsScalar(mask[*aj])) {
1069           if (b) bb[i] -= *aa*xx[*aj];
1070           *aa = 0.0;
1071         }
1072         aa++;
1073         aj++;
1074       }
1075     }
1076   }
1077   if (x) {
1078     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1079     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1080   }
1081   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1082   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1083   ierr = PetscFree(lrows);CHKERRQ(ierr);
1084 
1085   /* wait on sends */
1086   if (nsends) {
1087     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1088     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1089     ierr = PetscFree(send_status);CHKERRQ(ierr);
1090   }
1091   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1092   ierr = PetscFree(svalues);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMult_MPIAIJ"
1098 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102   PetscInt       nt;
1103 
1104   PetscFunctionBegin;
1105   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1106   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1128 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1129 {
1130   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1136   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1143 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1144 {
1145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscBool      merged;
1148 
1149   PetscFunctionBegin;
1150   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   if (!merged) {
1154     /* send it on its way */
1155     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156     /* do local part */
1157     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1158     /* receive remote parts: note this assumes the values are not actually */
1159     /* added in yy until the next line, */
1160     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   } else {
1162     /* do local part */
1163     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1164     /* send it on its way */
1165     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1166     /* values actually were received in the Begin() but we need to call this nop */
1167     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 EXTERN_C_BEGIN
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1175 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1176 {
1177   MPI_Comm       comm;
1178   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1179   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1180   IS             Me,Notme;
1181   PetscErrorCode ierr;
1182   PetscInt       M,N,first,last,*notme,i;
1183   PetscMPIInt    size;
1184 
1185   PetscFunctionBegin;
1186   /* Easy test: symmetric diagonal block */
1187   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1188   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1189   if (!*f) PetscFunctionReturn(0);
1190   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1192   if (size == 1) PetscFunctionReturn(0);
1193 
1194   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1195   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1196   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1197   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1198   for (i=0; i<first; i++) notme[i] = i;
1199   for (i=last; i<M; i++) notme[i-last+first] = i;
1200   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1201   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1202   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1203   Aoff = Aoffs[0];
1204   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1205   Boff = Boffs[0];
1206   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1207   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1209   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1211   ierr = PetscFree(notme);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* send it on its way */
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   /* do local part */
1229   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1230   /* receive remote parts */
1231   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*
1236   This only works correctly for square matrices where the subblock A->A is the
1237    diagonal block
1238 */
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1242 {
1243   PetscErrorCode ierr;
1244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1248   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1249   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatScale_MPIAIJ"
1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1256 {
1257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1262   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIAIJ"
1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1279   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1281 #if defined(PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode ierr;
1313   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1314   int            fd;
1315   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1316   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1317   PetscScalar    *column_values;
1318   PetscInt       message_count,flowcontrolcount;
1319   FILE           *file;
1320 
1321   PetscFunctionBegin;
1322   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1323   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1324   nz   = A->nz + B->nz;
1325   if (!rank) {
1326     header[0] = MAT_FILE_CLASSID;
1327     header[1] = mat->rmap->N;
1328     header[2] = mat->cmap->N;
1329 
1330     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1331     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1332     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1333     /* get largest number of rows any processor has */
1334     rlen  = mat->rmap->n;
1335     range = mat->rmap->range;
1336     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1337   } else {
1338     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339     rlen = mat->rmap->n;
1340   }
1341 
1342   /* load up the local row counts */
1343   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1344   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1345 
1346   /* store the row lengths to the file */
1347   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1348   if (!rank) {
1349     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1350     for (i=1; i<size; i++) {
1351       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1352       rlen = range[i+1] - range[i];
1353       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1360     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1361   }
1362   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1363 
1364   /* load up the local column indices */
1365   nzmax = nz; /* th processor needs space a largest processor needs */
1366   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1367   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1368   cnt   = 0;
1369   for (i=0; i<mat->rmap->n; i++) {
1370     for (j=B->i[i]; j<B->i[i+1]; j++) {
1371       if ((col = garray[B->j[j]]) > cstart) break;
1372       column_indices[cnt++] = col;
1373     }
1374     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1375     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1376   }
1377   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1378 
1379   /* store the column indices to the file */
1380   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1381   if (!rank) {
1382     MPI_Status status;
1383     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1384     for (i=1; i<size; i++) {
1385       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1386       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1387       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1388       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1389       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1390     }
1391     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1392   } else {
1393     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1394     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1395     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1396     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1397   }
1398   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1399 
1400   /* load up the local column values */
1401   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1402   cnt  = 0;
1403   for (i=0; i<mat->rmap->n; i++) {
1404     for (j=B->i[i]; j<B->i[i+1]; j++) {
1405       if (garray[B->j[j]] > cstart) break;
1406       column_values[cnt++] = B->a[j];
1407     }
1408     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1409     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1410   }
1411   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1412 
1413   /* store the column values to the file */
1414   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1415   if (!rank) {
1416     MPI_Status status;
1417     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     for (i=1; i<size; i++) {
1419       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1420       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1421       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1422       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1424     }
1425     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1426   } else {
1427     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1428     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1429     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1430     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1431   }
1432   ierr = PetscFree(column_values);CHKERRQ(ierr);
1433 
1434   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1435   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1441 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1442 {
1443   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1444   PetscErrorCode    ierr;
1445   PetscMPIInt       rank = aij->rank,size = aij->size;
1446   PetscBool         isdraw,iascii,isbinary;
1447   PetscViewer       sviewer;
1448   PetscViewerFormat format;
1449 
1450   PetscFunctionBegin;
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1454   if (iascii) {
1455     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1456     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1457       MatInfo   info;
1458       PetscBool inodes;
1459 
1460       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1461       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt**)&inodes,PETSC_NULL);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1464       if (!inodes) {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1466                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       } else {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       }
1471       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1473       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1475       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1478       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1479       PetscFunctionReturn(0);
1480     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1481       PetscInt inodecount,inodelimit,*inodes;
1482       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1483       if (inodes) {
1484         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1485       } else {
1486         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1487       }
1488       PetscFunctionReturn(0);
1489     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1490       PetscFunctionReturn(0);
1491     }
1492   } else if (isbinary) {
1493     if (size == 1) {
1494       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1495       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1496     } else {
1497       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1498     }
1499     PetscFunctionReturn(0);
1500   } else if (isdraw) {
1501     PetscDraw draw;
1502     PetscBool isnull;
1503     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1504     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1505   }
1506 
1507   if (size == 1) {
1508     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1509     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1510   } else {
1511     /* assemble the entire matrix onto first processor. */
1512     Mat        A;
1513     Mat_SeqAIJ *Aloc;
1514     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1515     MatScalar  *a;
1516 
1517     if (mat->rmap->N > 1024) {
1518       PetscBool flg = PETSC_FALSE;
1519 
1520       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1521       if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1522     }
1523 
1524     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1525     if (!rank) {
1526       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1527     } else {
1528       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1529     }
1530     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1531     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1532     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1533     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1534     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1535 
1536     /* copy over the A part */
1537     Aloc = (Mat_SeqAIJ*)aij->A->data;
1538     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1539     row = mat->rmap->rstart;
1540     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1544     }
1545     aj = Aloc->j;
1546     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1547 
1548     /* copy over the B part */
1549     Aloc = (Mat_SeqAIJ*)aij->B->data;
1550     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551     row  = mat->rmap->rstart;
1552     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1553     ct   = cols;
1554     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1555     for (i=0; i<m; i++) {
1556       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1557       row++;
1558       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1559     }
1560     ierr = PetscFree(ct);CHKERRQ(ierr);
1561     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1562     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1563     /*
1564        Everyone has to call to draw the matrix since the graphics waits are
1565        synchronized across all processors that share the PetscDraw object
1566     */
1567     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1568     if (!rank) {
1569       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1570       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1571       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1572       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1573     }
1574     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1575     ierr = MatDestroy(&A);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatView_MPIAIJ"
1582 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1583 {
1584   PetscErrorCode ierr;
1585   PetscBool      iascii,isdraw,issocket,isbinary;
1586 
1587   PetscFunctionBegin;
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1590   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1592   if (iascii || isdraw || isbinary || issocket) {
1593     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (flag == SOR_APPLY_UPPER) {
1609     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1610     PetscFunctionReturn(0);
1611   }
1612 
1613   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1614     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   } else if (flag & SOR_EISENSTAT) {
1667     Vec xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   Mat            aA,aB,Aperm;
1703   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1704   PetscScalar    *aa,*ba;
1705   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1706   PetscSF        rowsf,sf;
1707   IS             parcolp = PETSC_NULL;
1708   PetscBool      done;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1713   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1714   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1715   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1716 
1717   /* Invert row permutation to find out where my rows should go */
1718   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1719   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1720   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1721   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1722   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1723   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1724 
1725   /* Invert column permutation to find out where my columns should go */
1726   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1727   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1728   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1729   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1730   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1731   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1732   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1733 
1734   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1735   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1736   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1737 
1738   /* Find out where my gcols should go */
1739   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1740   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1741   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1742   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1743   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1744   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1746   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1747 
1748   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1749   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1750   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1751   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1752   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1753   for (i=0; i<m; i++) {
1754     PetscInt row = rdest[i],rowner;
1755     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1756     for (j=ai[i]; j<ai[i+1]; j++) {
1757       PetscInt cowner,col = cdest[aj[j]];
1758       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1759       if (rowner == cowner) dnnz[i]++;
1760       else onnz[i]++;
1761     }
1762     for (j=bi[i]; j<bi[i+1]; j++) {
1763       PetscInt cowner,col = gcdest[bj[j]];
1764       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1765       if (rowner == cowner) dnnz[i]++;
1766       else onnz[i]++;
1767     }
1768   }
1769   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1771   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1772   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1773   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1774   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1775   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1776 
1777   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1778   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1779   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1780   for (i=0; i<m; i++) {
1781     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1782     PetscInt rowlen;
1783     rowlen = ai[i+1] - ai[i];
1784     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1785     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1786     rowlen = bi[i+1] - bi[i];
1787     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1788     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1789   }
1790   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1791   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1792   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1793   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1794   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1795   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1796   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1797   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1798   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1799   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1800   *B = Aperm;
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1806 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   Mat            A    = mat->A,B = mat->B;
1810   PetscErrorCode ierr;
1811   PetscReal      isend[5],irecv[5];
1812 
1813   PetscFunctionBegin;
1814   info->block_size = 1.0;
1815   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1816 
1817   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1818   isend[3] = info->memory;  isend[4] = info->mallocs;
1819 
1820   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1821 
1822   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1823   isend[3] += info->memory;  isend[4] += info->mallocs;
1824   if (flag == MAT_LOCAL) {
1825     info->nz_used      = isend[0];
1826     info->nz_allocated = isend[1];
1827     info->nz_unneeded  = isend[2];
1828     info->memory       = isend[3];
1829     info->mallocs      = isend[4];
1830   } else if (flag == MAT_GLOBAL_MAX) {
1831     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1832 
1833     info->nz_used      = irecv[0];
1834     info->nz_allocated = irecv[1];
1835     info->nz_unneeded  = irecv[2];
1836     info->memory       = irecv[3];
1837     info->mallocs      = irecv[4];
1838   } else if (flag == MAT_GLOBAL_SUM) {
1839     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1840 
1841     info->nz_used      = irecv[0];
1842     info->nz_allocated = irecv[1];
1843     info->nz_unneeded  = irecv[2];
1844     info->memory       = irecv[3];
1845     info->mallocs      = irecv[4];
1846   }
1847   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1848   info->fill_ratio_needed = 0;
1849   info->factor_mallocs    = 0;
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatSetOption_MPIAIJ"
1855 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1856 {
1857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1858   PetscErrorCode ierr;
1859 
1860   PetscFunctionBegin;
1861   switch (op) {
1862   case MAT_NEW_NONZERO_LOCATIONS:
1863   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1864   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1865   case MAT_KEEP_NONZERO_PATTERN:
1866   case MAT_NEW_NONZERO_LOCATION_ERR:
1867   case MAT_USE_INODES:
1868   case MAT_IGNORE_ZERO_ENTRIES:
1869     MatCheckPreallocated(A,1);
1870     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1871     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1872     break;
1873   case MAT_ROW_ORIENTED:
1874     a->roworiented = flg;
1875 
1876     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1877     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1878     break;
1879   case MAT_NEW_DIAGONALS:
1880     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1881     break;
1882   case MAT_IGNORE_OFF_PROC_ENTRIES:
1883     a->donotstash = flg;
1884     break;
1885   case MAT_SPD:
1886     A->spd_set = PETSC_TRUE;
1887     A->spd     = flg;
1888     if (flg) {
1889       A->symmetric                  = PETSC_TRUE;
1890       A->structurally_symmetric     = PETSC_TRUE;
1891       A->symmetric_set              = PETSC_TRUE;
1892       A->structurally_symmetric_set = PETSC_TRUE;
1893     }
1894     break;
1895   case MAT_SYMMETRIC:
1896     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1897     break;
1898   case MAT_STRUCTURALLY_SYMMETRIC:
1899     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1900     break;
1901   case MAT_HERMITIAN:
1902     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1903     break;
1904   case MAT_SYMMETRY_ETERNAL:
1905     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1906     break;
1907   default:
1908     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatGetRow_MPIAIJ"
1915 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1916 {
1917   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1918   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1919   PetscErrorCode ierr;
1920   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1921   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1922   PetscInt       *cmap,*idx_p;
1923 
1924   PetscFunctionBegin;
1925   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1926   mat->getrowactive = PETSC_TRUE;
1927 
1928   if (!mat->rowvalues && (idx || v)) {
1929     /*
1930         allocate enough space to hold information from the longest row.
1931     */
1932     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1933     PetscInt   max = 1,tmp;
1934     for (i=0; i<matin->rmap->n; i++) {
1935       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1936       if (max < tmp) max = tmp;
1937     }
1938     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1939   }
1940 
1941   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1942   lrow = row - rstart;
1943 
1944   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1945   if (!v)   {pvA = 0; pvB = 0;}
1946   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1947   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1948   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1949   nztot = nzA + nzB;
1950 
1951   cmap = mat->garray;
1952   if (v  || idx) {
1953     if (nztot) {
1954       /* Sort by increasing column numbers, assuming A and B already sorted */
1955       PetscInt imark = -1;
1956       if (v) {
1957         *v = v_p = mat->rowvalues;
1958         for (i=0; i<nzB; i++) {
1959           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1960           else break;
1961         }
1962         imark = i;
1963         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1964         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1965       }
1966       if (idx) {
1967         *idx = idx_p = mat->rowindices;
1968         if (imark > -1) {
1969           for (i=0; i<imark; i++) {
1970             idx_p[i] = cmap[cworkB[i]];
1971           }
1972         } else {
1973           for (i=0; i<nzB; i++) {
1974             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1975             else break;
1976           }
1977           imark = i;
1978         }
1979         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1980         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1981       }
1982     } else {
1983       if (idx) *idx = 0;
1984       if (v)   *v   = 0;
1985     }
1986   }
1987   *nz  = nztot;
1988   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1989   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1995 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1996 {
1997   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1998 
1999   PetscFunctionBegin;
2000   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2001   aij->getrowactive = PETSC_FALSE;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatNorm_MPIAIJ"
2007 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2008 {
2009   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2010   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2011   PetscErrorCode ierr;
2012   PetscInt       i,j,cstart = mat->cmap->rstart;
2013   PetscReal      sum = 0.0;
2014   MatScalar      *v;
2015 
2016   PetscFunctionBegin;
2017   if (aij->size == 1) {
2018     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2019   } else {
2020     if (type == NORM_FROBENIUS) {
2021       v = amat->a;
2022       for (i=0; i<amat->nz; i++) {
2023         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2024       }
2025       v = bmat->a;
2026       for (i=0; i<bmat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2030       *norm = PetscSqrtReal(*norm);
2031     } else if (type == NORM_1) { /* max column norm */
2032       PetscReal *tmp,*tmp2;
2033       PetscInt  *jj,*garray = aij->garray;
2034       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2035       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2036       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2037       *norm = 0.0;
2038       v     = amat->a; jj = amat->j;
2039       for (j=0; j<amat->nz; j++) {
2040         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2041       }
2042       v = bmat->a; jj = bmat->j;
2043       for (j=0; j<bmat->nz; j++) {
2044         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2045       }
2046       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2047       for (j=0; j<mat->cmap->N; j++) {
2048         if (tmp2[j] > *norm) *norm = tmp2[j];
2049       }
2050       ierr = PetscFree(tmp);CHKERRQ(ierr);
2051       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2052     } else if (type == NORM_INFINITY) { /* max row norm */
2053       PetscReal ntemp = 0.0;
2054       for (j=0; j<aij->A->rmap->n; j++) {
2055         v   = amat->a + amat->i[j];
2056         sum = 0.0;
2057         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2058           sum += PetscAbsScalar(*v); v++;
2059         }
2060         v = bmat->a + bmat->i[j];
2061         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         if (sum > ntemp) ntemp = sum;
2065       }
2066       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2067     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatTranspose_MPIAIJ"
2074 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2075 {
2076   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2077   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2078   PetscErrorCode ierr;
2079   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2080   PetscInt       cstart = A->cmap->rstart,ncol;
2081   Mat            B;
2082   MatScalar      *array;
2083 
2084   PetscFunctionBegin;
2085   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2086 
2087   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2088   ai = Aloc->i; aj = Aloc->j;
2089   bi = Bloc->i; bj = Bloc->j;
2090   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2091     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2092     PetscSFNode          *oloc;
2093     PETSC_UNUSED PetscSF sf;
2094 
2095     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2096     /* compute d_nnz for preallocation */
2097     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2098     for (i=0; i<ai[ma]; i++) {
2099       d_nnz[aj[i]]++;
2100       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2101     }
2102     /* compute local off-diagonal contributions */
2103     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2104     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2105     /* map those to global */
2106     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2107     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2108     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2109     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2110     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2111     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2112     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2113 
2114     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2115     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2116     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2117     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2118     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2119     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2120   } else {
2121     B    = *matout;
2122     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2123     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124   }
2125 
2126   /* copy over the A part */
2127   array = Aloc->a;
2128   row   = A->rmap->rstart;
2129   for (i=0; i<ma; i++) {
2130     ncol = ai[i+1]-ai[i];
2131     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2132     row++;
2133     array += ncol; aj += ncol;
2134   }
2135   aj = Aloc->j;
2136   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2137 
2138   /* copy over the B part */
2139   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2140   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2141   array = Bloc->a;
2142   row   = A->rmap->rstart;
2143   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2144   cols_tmp = cols;
2145   for (i=0; i<mb; i++) {
2146     ncol = bi[i+1]-bi[i];
2147     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2148     row++;
2149     array += ncol; cols_tmp += ncol;
2150   }
2151   ierr = PetscFree(cols);CHKERRQ(ierr);
2152 
2153   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2156     *matout = B;
2157   } else {
2158     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2159   }
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2165 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2166 {
2167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2168   Mat            a    = aij->A,b = aij->B;
2169   PetscErrorCode ierr;
2170   PetscInt       s1,s2,s3;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2174   if (rr) {
2175     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2176     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2177     /* Overlap communication with computation. */
2178     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2179   }
2180   if (ll) {
2181     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2182     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2183     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2184   }
2185   /* scale  the diagonal block */
2186   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2187 
2188   if (rr) {
2189     /* Do a scatter end and then right scale the off-diagonal block */
2190     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2191     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2198 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2199 {
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatEqual_MPIAIJ"
2210 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2211 {
2212   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2213   Mat            a,b,c,d;
2214   PetscBool      flg;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   a = matA->A; b = matA->B;
2219   c = matB->A; d = matB->B;
2220 
2221   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2222   if (flg) {
2223     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2224   }
2225   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatCopy_MPIAIJ"
2231 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2232 {
2233   PetscErrorCode ierr;
2234   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2235   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2236 
2237   PetscFunctionBegin;
2238   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2239   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2240     /* because of the column compression in the off-processor part of the matrix a->B,
2241        the number of columns in a->B and b->B may be different, hence we cannot call
2242        the MatCopy() directly on the two parts. If need be, we can provide a more
2243        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2244        then copying the submatrices */
2245     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2246   } else {
2247     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2248     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2249   }
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatSetUp_MPIAIJ"
2255 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2256 {
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2266 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2267 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2268 {
2269   PetscInt       i,m=Y->rmap->N;
2270   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2271   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2272   const PetscInt *xi = x->i,*yi = y->i;
2273 
2274   PetscFunctionBegin;
2275   /* Set the number of nonzeros in the new matrix */
2276   for (i=0; i<m; i++) {
2277     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2278     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2279     nnz[i] = 0;
2280     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2281       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2282       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2283       nnz[i]++;
2284     }
2285     for (; k<nzy; k++) nnz[i]++;
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatAXPY_MPIAIJ"
2292 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2293 {
2294   PetscErrorCode ierr;
2295   PetscInt       i;
2296   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2297   PetscBLASInt   bnz,one=1;
2298   Mat_SeqAIJ     *x,*y;
2299 
2300   PetscFunctionBegin;
2301   if (str == SAME_NONZERO_PATTERN) {
2302     PetscScalar alpha = a;
2303     x    = (Mat_SeqAIJ*)xx->A->data;
2304     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2305     y    = (Mat_SeqAIJ*)yy->A->data;
2306     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2307     x    = (Mat_SeqAIJ*)xx->B->data;
2308     y    = (Mat_SeqAIJ*)yy->B->data;
2309     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2310     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2311   } else if (str == SUBSET_NONZERO_PATTERN) {
2312     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2313 
2314     x = (Mat_SeqAIJ*)xx->B->data;
2315     y = (Mat_SeqAIJ*)yy->B->data;
2316     if (y->xtoy && y->XtoY != xx->B) {
2317       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2318       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2319     }
2320     if (!y->xtoy) { /* get xtoy */
2321       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2322       y->XtoY = xx->B;
2323       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2324     }
2325     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2326   } else {
2327     Mat      B;
2328     PetscInt *nnz_d,*nnz_o;
2329     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2330     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2331     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2332     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2333     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2334     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2335     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2336     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2337     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2339     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2340     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2341     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2342     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2343   }
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatConjugate_MPIAIJ"
2351 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2352 {
2353 #if defined(PETSC_USE_COMPLEX)
2354   PetscErrorCode ierr;
2355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2356 
2357   PetscFunctionBegin;
2358   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2359   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2360 #else
2361   PetscFunctionBegin;
2362 #endif
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatRealPart_MPIAIJ"
2368 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2369 {
2370   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2375   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2381 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2382 {
2383   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2388   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #if defined(PETSC_HAVE_PBGL)
2393 
2394 #include <boost/parallel/mpi/bsp_process_group.hpp>
2395 #include <boost/graph/distributed/ilu_default_graph.hpp>
2396 #include <boost/graph/distributed/ilu_0_block.hpp>
2397 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2398 #include <boost/graph/distributed/petsc/interface.hpp>
2399 #include <boost/multi_array.hpp>
2400 #include <boost/parallel/distributed_property_map->hpp>
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2404 /*
2405   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2406 */
2407 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2408 {
2409   namespace petsc = boost::distributed::petsc;
2410 
2411   namespace graph_dist = boost::graph::distributed;
2412   using boost::graph::distributed::ilu_default::process_group_type;
2413   using boost::graph::ilu_permuted;
2414 
2415   PetscBool      row_identity, col_identity;
2416   PetscContainer c;
2417   PetscInt       m, n, M, N;
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2422   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2423   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2424   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2425 
2426   process_group_type pg;
2427   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2428   lgraph_type  *lgraph_p  = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2429   lgraph_type& level_graph = *lgraph_p;
2430   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2431 
2432   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2433   ilu_permuted(level_graph);
2434 
2435   /* put together the new matrix */
2436   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2437   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2438   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2439   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2440   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2441   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2442   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444 
2445   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2446   ierr = PetscContainerSetPointer(c, lgraph_p);
2447   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2448   ierr = PetscContainerDestroy(&c);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2454 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2455 {
2456   PetscFunctionBegin;
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatSolve_MPIAIJ"
2462 /*
2463   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2464 */
2465 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2466 {
2467   namespace graph_dist = boost::graph::distributed;
2468 
2469   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2470   lgraph_type    *lgraph_p;
2471   PetscContainer c;
2472   PetscErrorCode ierr;
2473 
2474   PetscFunctionBegin;
2475   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2476   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2477   ierr = VecCopy(b, x);CHKERRQ(ierr);
2478 
2479   PetscScalar *array_x;
2480   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2481   PetscInt sx;
2482   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2483 
2484   PetscScalar *array_b;
2485   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2486   PetscInt sb;
2487   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2488 
2489   lgraph_type& level_graph = *lgraph_p;
2490   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2491 
2492   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2493   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2494   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2495 
2496   typedef boost::iterator_property_map<array_ref_type::iterator,
2497                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2498   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2499   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2500 
2501   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2502   PetscFunctionReturn(0);
2503 }
2504 #endif
2505 
2506 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2507   PetscInt    nzlocal,nsends,nrecvs;
2508   PetscMPIInt *send_rank,*recv_rank;
2509   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2510   PetscScalar *sbuf_a,**rbuf_a;
2511   PetscErrorCode (*Destroy)(Mat);
2512 } Mat_Redundant;
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2516 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2517 {
2518   PetscErrorCode ierr;
2519   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2520   PetscInt       i;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2524   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2525   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2526   for (i=0; i<redund->nrecvs; i++) {
2527     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2528     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2529   }
2530   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2531   ierr = PetscFree(redund);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatDestroy_MatRedundant"
2537 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2538 {
2539   PetscErrorCode ierr;
2540   PetscContainer container;
2541   Mat_Redundant  *redund=PETSC_NULL;
2542 
2543   PetscFunctionBegin;
2544   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2545   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2546   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2547 
2548   A->ops->destroy = redund->Destroy;
2549 
2550   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2551   if (A->ops->destroy) {
2552     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2559 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2560 {
2561   PetscMPIInt    rank,size;
2562   MPI_Comm       comm=((PetscObject)mat)->comm;
2563   PetscErrorCode ierr;
2564   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2565   PetscMPIInt    *send_rank= PETSC_NULL,*recv_rank=PETSC_NULL;
2566   PetscInt       *rowrange = mat->rmap->range;
2567   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2568   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2569   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2570   PetscScalar    *sbuf_a;
2571   PetscInt       nzlocal=a->nz+b->nz;
2572   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2573   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2574   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2575   MatScalar      *aworkA,*aworkB;
2576   PetscScalar    *vals;
2577   PetscMPIInt    tag1,tag2,tag3,imdex;
2578   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL;
2579   MPI_Request    *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2580   MPI_Status     recv_status,*send_status;
2581   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2582   PetscInt       **rbuf_j=PETSC_NULL;
2583   PetscScalar    **rbuf_a=PETSC_NULL;
2584   Mat_Redundant  *redund =PETSC_NULL;
2585   PetscContainer container;
2586 
2587   PetscFunctionBegin;
2588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2590 
2591   if (reuse == MAT_REUSE_MATRIX) {
2592     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2593     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2594     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2595     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2596     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2597     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2598     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2599     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2600 
2601     nsends    = redund->nsends;
2602     nrecvs    = redund->nrecvs;
2603     send_rank = redund->send_rank;
2604     recv_rank = redund->recv_rank;
2605     sbuf_nz   = redund->sbuf_nz;
2606     rbuf_nz   = redund->rbuf_nz;
2607     sbuf_j    = redund->sbuf_j;
2608     sbuf_a    = redund->sbuf_a;
2609     rbuf_j    = redund->rbuf_j;
2610     rbuf_a    = redund->rbuf_a;
2611   }
2612 
2613   if (reuse == MAT_INITIAL_MATRIX) {
2614     PetscMPIInt subrank,subsize;
2615     PetscInt    nleftover,np_subcomm;
2616     /* get the destination processors' id send_rank, nsends and nrecvs */
2617     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2618     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2619     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2620 
2621     np_subcomm = size/nsubcomm;
2622     nleftover  = size - nsubcomm*np_subcomm;
2623 
2624     nsends = 0; nrecvs = 0;
2625     for (i=0; i<size; i++) { /* i=rank*/
2626       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2627         send_rank[nsends]   = i; nsends++;
2628         recv_rank[nrecvs++] = i;
2629       }
2630     }
2631     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2632       i = size-nleftover-1;
2633       j = 0;
2634       while (j < nsubcomm - nleftover) {
2635         send_rank[nsends++] = i;
2636         i--; j++;
2637       }
2638     }
2639 
2640     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2641       for (i=0; i<nleftover; i++) {
2642         recv_rank[nrecvs++] = size-nleftover+i;
2643       }
2644     }
2645 
2646     /* allocate sbuf_j, sbuf_a */
2647     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2648     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2649     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2650   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2651 
2652   /* copy mat's local entries into the buffers */
2653   if (reuse == MAT_INITIAL_MATRIX) {
2654     rownz_max = 0;
2655     rptr      = sbuf_j;
2656     cols      = sbuf_j + rend-rstart + 1;
2657     vals      = sbuf_a;
2658     rptr[0]   = 0;
2659     for (i=0; i<rend-rstart; i++) {
2660       row    = i + rstart;
2661       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2662       ncols  = nzA + nzB;
2663       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2664       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2665       /* load the column indices for this row into cols */
2666       lwrite = 0;
2667       for (l=0; l<nzB; l++) {
2668         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2669           vals[lwrite]   = aworkB[l];
2670           cols[lwrite++] = ctmp;
2671         }
2672       }
2673       for (l=0; l<nzA; l++) {
2674         vals[lwrite]   = aworkA[l];
2675         cols[lwrite++] = cstart + cworkA[l];
2676       }
2677       for (l=0; l<nzB; l++) {
2678         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2679           vals[lwrite]   = aworkB[l];
2680           cols[lwrite++] = ctmp;
2681         }
2682       }
2683       vals     += ncols;
2684       cols     += ncols;
2685       rptr[i+1] = rptr[i] + ncols;
2686       if (rownz_max < ncols) rownz_max = ncols;
2687     }
2688     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2689   } else { /* only copy matrix values into sbuf_a */
2690     rptr    = sbuf_j;
2691     vals    = sbuf_a;
2692     rptr[0] = 0;
2693     for (i=0; i<rend-rstart; i++) {
2694       row    = i + rstart;
2695       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2696       ncols  = nzA + nzB;
2697       cworkB = b->j + b->i[i];
2698       aworkA = a->a + a->i[i];
2699       aworkB = b->a + b->i[i];
2700       lwrite = 0;
2701       for (l=0; l<nzB; l++) {
2702         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2703       }
2704       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2705       for (l=0; l<nzB; l++) {
2706         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2707       }
2708       vals     += ncols;
2709       rptr[i+1] = rptr[i] + ncols;
2710     }
2711   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2712 
2713   /* send nzlocal to others, and recv other's nzlocal */
2714   /*--------------------------------------------------*/
2715   if (reuse == MAT_INITIAL_MATRIX) {
2716     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2717 
2718     s_waits2 = s_waits3 + nsends;
2719     s_waits1 = s_waits2 + nsends;
2720     r_waits1 = s_waits1 + nsends;
2721     r_waits2 = r_waits1 + nrecvs;
2722     r_waits3 = r_waits2 + nrecvs;
2723   } else {
2724     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2725 
2726     r_waits3 = s_waits3 + nsends;
2727   }
2728 
2729   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2730   if (reuse == MAT_INITIAL_MATRIX) {
2731     /* get new tags to keep the communication clean */
2732     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2733     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2734     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2735 
2736     /* post receives of other's nzlocal */
2737     for (i=0; i<nrecvs; i++) {
2738       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2739     }
2740     /* send nzlocal to others */
2741     for (i=0; i<nsends; i++) {
2742       sbuf_nz[i] = nzlocal;
2743       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2744     }
2745     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2746     count = nrecvs;
2747     while (count) {
2748       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2749       recv_rank[imdex] = recv_status.MPI_SOURCE;
2750       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2751       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2752 
2753       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2754 
2755       rbuf_nz[imdex] += i + 2;
2756 
2757       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2758       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2759       count--;
2760     }
2761     /* wait on sends of nzlocal */
2762     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2763     /* send mat->i,j to others, and recv from other's */
2764     /*------------------------------------------------*/
2765     for (i=0; i<nsends; i++) {
2766       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2767       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2768     }
2769     /* wait on receives of mat->i,j */
2770     /*------------------------------*/
2771     count = nrecvs;
2772     while (count) {
2773       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2774       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2775       count--;
2776     }
2777     /* wait on sends of mat->i,j */
2778     /*---------------------------*/
2779     if (nsends) {
2780       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2781     }
2782   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2783 
2784   /* post receives, send and receive mat->a */
2785   /*----------------------------------------*/
2786   for (imdex=0; imdex<nrecvs; imdex++) {
2787     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2788   }
2789   for (i=0; i<nsends; i++) {
2790     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2791   }
2792   count = nrecvs;
2793   while (count) {
2794     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2795     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2796     count--;
2797   }
2798   if (nsends) {
2799     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2800   }
2801 
2802   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2803 
2804   /* create redundant matrix */
2805   /*-------------------------*/
2806   if (reuse == MAT_INITIAL_MATRIX) {
2807     /* compute rownz_max for preallocation */
2808     for (imdex=0; imdex<nrecvs; imdex++) {
2809       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2810       rptr = rbuf_j[imdex];
2811       for (i=0; i<j; i++) {
2812         ncols = rptr[i+1] - rptr[i];
2813         if (rownz_max < ncols) rownz_max = ncols;
2814       }
2815     }
2816 
2817     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2818     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2819     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2820     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2821     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2822     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2823   } else {
2824     C = *matredundant;
2825   }
2826 
2827   /* insert local matrix entries */
2828   rptr = sbuf_j;
2829   cols = sbuf_j + rend-rstart + 1;
2830   vals = sbuf_a;
2831   for (i=0; i<rend-rstart; i++) {
2832     row   = i + rstart;
2833     ncols = rptr[i+1] - rptr[i];
2834     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2835     vals += ncols;
2836     cols += ncols;
2837   }
2838   /* insert received matrix entries */
2839   for (imdex=0; imdex<nrecvs; imdex++) {
2840     rstart = rowrange[recv_rank[imdex]];
2841     rend   = rowrange[recv_rank[imdex]+1];
2842     rptr   = rbuf_j[imdex];
2843     cols   = rbuf_j[imdex] + rend-rstart + 1;
2844     vals   = rbuf_a[imdex];
2845     for (i=0; i<rend-rstart; i++) {
2846       row   = i + rstart;
2847       ncols = rptr[i+1] - rptr[i];
2848       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2849       vals += ncols;
2850       cols += ncols;
2851     }
2852   }
2853   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2854   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2856   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2857   if (reuse == MAT_INITIAL_MATRIX) {
2858     PetscContainer container;
2859     *matredundant = C;
2860     /* create a supporting struct and attach it to C for reuse */
2861     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2862     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2863     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2864     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2865     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2866     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2867 
2868     redund->nzlocal   = nzlocal;
2869     redund->nsends    = nsends;
2870     redund->nrecvs    = nrecvs;
2871     redund->send_rank = send_rank;
2872     redund->recv_rank = recv_rank;
2873     redund->sbuf_nz   = sbuf_nz;
2874     redund->rbuf_nz   = rbuf_nz;
2875     redund->sbuf_j    = sbuf_j;
2876     redund->sbuf_a    = sbuf_a;
2877     redund->rbuf_j    = rbuf_j;
2878     redund->rbuf_a    = rbuf_a;
2879 
2880     redund->Destroy = C->ops->destroy;
2881     C->ops->destroy = MatDestroy_MatRedundant;
2882   }
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 #undef __FUNCT__
2887 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2888 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2889 {
2890   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2891   PetscErrorCode ierr;
2892   PetscInt       i,*idxb = 0;
2893   PetscScalar    *va,*vb;
2894   Vec            vtmp;
2895 
2896   PetscFunctionBegin;
2897   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2898   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2899   if (idx) {
2900     for (i=0; i<A->rmap->n; i++) {
2901       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2902     }
2903   }
2904 
2905   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2906   if (idx) {
2907     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2908   }
2909   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2910   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2911 
2912   for (i=0; i<A->rmap->n; i++) {
2913     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2914       va[i] = vb[i];
2915       if (idx) idx[i] = a->garray[idxb[i]];
2916     }
2917   }
2918 
2919   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2920   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2921   ierr = PetscFree(idxb);CHKERRQ(ierr);
2922   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2928 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2929 {
2930   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2931   PetscErrorCode ierr;
2932   PetscInt       i,*idxb = 0;
2933   PetscScalar    *va,*vb;
2934   Vec            vtmp;
2935 
2936   PetscFunctionBegin;
2937   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2938   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2939   if (idx) {
2940     for (i=0; i<A->cmap->n; i++) {
2941       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2942     }
2943   }
2944 
2945   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2946   if (idx) {
2947     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2948   }
2949   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2950   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2951 
2952   for (i=0; i<A->rmap->n; i++) {
2953     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2954       va[i] = vb[i];
2955       if (idx) idx[i] = a->garray[idxb[i]];
2956     }
2957   }
2958 
2959   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2961   ierr = PetscFree(idxb);CHKERRQ(ierr);
2962   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2968 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2969 {
2970   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2971   PetscInt       n      = A->rmap->n;
2972   PetscInt       cstart = A->cmap->rstart;
2973   PetscInt       *cmap  = mat->garray;
2974   PetscInt       *diagIdx, *offdiagIdx;
2975   Vec            diagV, offdiagV;
2976   PetscScalar    *a, *diagA, *offdiagA;
2977   PetscInt       r;
2978   PetscErrorCode ierr;
2979 
2980   PetscFunctionBegin;
2981   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2982   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2983   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2984   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2985   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2986   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2987   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2988   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2989   for (r = 0; r < n; ++r) {
2990     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2991       a[r]   = diagA[r];
2992       idx[r] = cstart + diagIdx[r];
2993     } else {
2994       a[r]   = offdiagA[r];
2995       idx[r] = cmap[offdiagIdx[r]];
2996     }
2997   }
2998   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2999   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3000   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3001   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3002   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3003   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 #undef __FUNCT__
3008 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3009 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3010 {
3011   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3012   PetscInt       n      = A->rmap->n;
3013   PetscInt       cstart = A->cmap->rstart;
3014   PetscInt       *cmap  = mat->garray;
3015   PetscInt       *diagIdx, *offdiagIdx;
3016   Vec            diagV, offdiagV;
3017   PetscScalar    *a, *diagA, *offdiagA;
3018   PetscInt       r;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3023   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3024   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3025   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3026   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3027   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3028   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3029   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3030   for (r = 0; r < n; ++r) {
3031     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3032       a[r]   = diagA[r];
3033       idx[r] = cstart + diagIdx[r];
3034     } else {
3035       a[r]   = offdiagA[r];
3036       idx[r] = cmap[offdiagIdx[r]];
3037     }
3038   }
3039   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3040   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3041   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3042   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3043   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3044   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3050 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3051 {
3052   PetscErrorCode ierr;
3053   Mat            *dummy;
3054 
3055   PetscFunctionBegin;
3056   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3057   *newmat = *dummy;
3058   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3063 
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3066 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3067 {
3068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3069   PetscErrorCode ierr;
3070 
3071   PetscFunctionBegin;
3072   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 #undef __FUNCT__
3077 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3078 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3079 {
3080   PetscErrorCode ierr;
3081   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3082 
3083   PetscFunctionBegin;
3084   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3085   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3086   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3087   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 /* -------------------------------------------------------------------*/
3092 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3093        MatGetRow_MPIAIJ,
3094        MatRestoreRow_MPIAIJ,
3095        MatMult_MPIAIJ,
3096 /* 4*/ MatMultAdd_MPIAIJ,
3097        MatMultTranspose_MPIAIJ,
3098        MatMultTransposeAdd_MPIAIJ,
3099 #if defined(PETSC_HAVE_PBGL)
3100        MatSolve_MPIAIJ,
3101 #else
3102        0,
3103 #endif
3104        0,
3105        0,
3106 /*10*/ 0,
3107        0,
3108        0,
3109        MatSOR_MPIAIJ,
3110        MatTranspose_MPIAIJ,
3111 /*15*/ MatGetInfo_MPIAIJ,
3112        MatEqual_MPIAIJ,
3113        MatGetDiagonal_MPIAIJ,
3114        MatDiagonalScale_MPIAIJ,
3115        MatNorm_MPIAIJ,
3116 /*20*/ MatAssemblyBegin_MPIAIJ,
3117        MatAssemblyEnd_MPIAIJ,
3118        MatSetOption_MPIAIJ,
3119        MatZeroEntries_MPIAIJ,
3120 /*24*/ MatZeroRows_MPIAIJ,
3121        0,
3122 #if defined(PETSC_HAVE_PBGL)
3123        0,
3124 #else
3125        0,
3126 #endif
3127        0,
3128        0,
3129 /*29*/ MatSetUp_MPIAIJ,
3130 #if defined(PETSC_HAVE_PBGL)
3131        0,
3132 #else
3133        0,
3134 #endif
3135        0,
3136        0,
3137        0,
3138 /*34*/ MatDuplicate_MPIAIJ,
3139        0,
3140        0,
3141        0,
3142        0,
3143 /*39*/ MatAXPY_MPIAIJ,
3144        MatGetSubMatrices_MPIAIJ,
3145        MatIncreaseOverlap_MPIAIJ,
3146        MatGetValues_MPIAIJ,
3147        MatCopy_MPIAIJ,
3148 /*44*/ MatGetRowMax_MPIAIJ,
3149        MatScale_MPIAIJ,
3150        0,
3151        0,
3152        MatZeroRowsColumns_MPIAIJ,
3153 /*49*/ MatSetRandom_MPIAIJ,
3154        0,
3155        0,
3156        0,
3157        0,
3158 /*54*/ MatFDColoringCreate_MPIAIJ,
3159        0,
3160        MatSetUnfactored_MPIAIJ,
3161        MatPermute_MPIAIJ,
3162        0,
3163 /*59*/ MatGetSubMatrix_MPIAIJ,
3164        MatDestroy_MPIAIJ,
3165        MatView_MPIAIJ,
3166        0,
3167        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3168 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3169        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3170        0,
3171        0,
3172        0,
3173 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3174        MatGetRowMinAbs_MPIAIJ,
3175        0,
3176        MatSetColoring_MPIAIJ,
3177        0,
3178        MatSetValuesAdifor_MPIAIJ,
3179 /*75*/ MatFDColoringApply_AIJ,
3180        0,
3181        0,
3182        0,
3183        MatFindZeroDiagonals_MPIAIJ,
3184 /*80*/ 0,
3185        0,
3186        0,
3187 /*83*/ MatLoad_MPIAIJ,
3188        0,
3189        0,
3190        0,
3191        0,
3192        0,
3193 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3194        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3195        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3196        MatPtAP_MPIAIJ_MPIAIJ,
3197        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3198 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3199        0,
3200        0,
3201        0,
3202        0,
3203 /*99*/ 0,
3204        0,
3205        0,
3206        MatConjugate_MPIAIJ,
3207        0,
3208 /*104*/MatSetValuesRow_MPIAIJ,
3209        MatRealPart_MPIAIJ,
3210        MatImaginaryPart_MPIAIJ,
3211        0,
3212        0,
3213 /*109*/0,
3214        MatGetRedundantMatrix_MPIAIJ,
3215        MatGetRowMin_MPIAIJ,
3216        0,
3217        0,
3218 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3219        0,
3220        0,
3221        0,
3222        0,
3223 /*119*/0,
3224        0,
3225        0,
3226        0,
3227        MatGetMultiProcBlock_MPIAIJ,
3228 /*124*/MatFindNonzeroRows_MPIAIJ,
3229        MatGetColumnNorms_MPIAIJ,
3230        MatInvertBlockDiagonal_MPIAIJ,
3231        0,
3232        MatGetSubMatricesParallel_MPIAIJ,
3233 /*129*/0,
3234        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3235        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3236        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3237        0,
3238 /*134*/0,
3239        0,
3240        0,
3241        0,
3242        0
3243 };
3244 
3245 /* ----------------------------------------------------------------------------------------*/
3246 
3247 EXTERN_C_BEGIN
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3250 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3251 {
3252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3253   PetscErrorCode ierr;
3254 
3255   PetscFunctionBegin;
3256   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3257   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3258   PetscFunctionReturn(0);
3259 }
3260 EXTERN_C_END
3261 
3262 EXTERN_C_BEGIN
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3265 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3266 {
3267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3268   PetscErrorCode ierr;
3269 
3270   PetscFunctionBegin;
3271   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3272   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 EXTERN_C_END
3276 
3277 EXTERN_C_BEGIN
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3280 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3281 {
3282   Mat_MPIAIJ     *b;
3283   PetscErrorCode ierr;
3284   PetscInt       i;
3285   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3286 
3287   PetscFunctionBegin;
3288   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3289   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3290   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3291   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3292   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3293   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3294 
3295   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3296   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3297   if (d_nnz) {
3298     for (i=0; i<B->rmap->n; i++) {
3299       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3300     }
3301   }
3302   if (o_nnz) {
3303     for (i=0; i<B->rmap->n; i++) {
3304       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3305     }
3306   }
3307   b = (Mat_MPIAIJ*)B->data;
3308 
3309   if (!B->preallocated) {
3310     /* Explicitly create 2 MATSEQAIJ matrices. */
3311     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3312     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3313     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3314     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3315     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3316     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3317     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3318     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3319     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3320     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3321   }
3322 
3323   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3324   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3325   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3326   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3327   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3328   B->preallocated = PETSC_TRUE;
3329   PetscFunctionReturn(0);
3330 }
3331 EXTERN_C_END
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3335 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3336 {
3337   Mat            mat;
3338   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3339   PetscErrorCode ierr;
3340 
3341   PetscFunctionBegin;
3342   *newmat = 0;
3343   ierr    = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3344   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3345   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3346   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3347   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3348   a       = (Mat_MPIAIJ*)mat->data;
3349 
3350   mat->factortype   = matin->factortype;
3351   mat->rmap->bs     = matin->rmap->bs;
3352   mat->cmap->bs     = matin->cmap->bs;
3353   mat->assembled    = PETSC_TRUE;
3354   mat->insertmode   = NOT_SET_VALUES;
3355   mat->preallocated = PETSC_TRUE;
3356 
3357   a->size         = oldmat->size;
3358   a->rank         = oldmat->rank;
3359   a->donotstash   = oldmat->donotstash;
3360   a->roworiented  = oldmat->roworiented;
3361   a->rowindices   = 0;
3362   a->rowvalues    = 0;
3363   a->getrowactive = PETSC_FALSE;
3364 
3365   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3366   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3367 
3368   if (oldmat->colmap) {
3369 #if defined(PETSC_USE_CTABLE)
3370     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3371 #else
3372     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3373     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3374     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3375 #endif
3376   } else a->colmap = 0;
3377   if (oldmat->garray) {
3378     PetscInt len;
3379     len  = oldmat->B->cmap->n;
3380     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3381     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3382     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3383   } else a->garray = 0;
3384 
3385   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3386   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3387   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3388   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3389   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3390   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3391   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3393   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3394   *newmat = mat;
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 
3399 
3400 #undef __FUNCT__
3401 #define __FUNCT__ "MatLoad_MPIAIJ"
3402 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3403 {
3404   PetscScalar    *vals,*svals;
3405   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3406   PetscErrorCode ierr;
3407   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3408   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3409   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3410   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3411   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3412   int            fd;
3413   PetscInt       bs = 1;
3414 
3415   PetscFunctionBegin;
3416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3418   if (!rank) {
3419     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3420     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3421     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3422   }
3423 
3424   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3425   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3426   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3427 
3428   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3429 
3430   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3431   M    = header[1]; N = header[2];
3432   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3433   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3434   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3435 
3436   /* If global sizes are set, check if they are consistent with that given in the file */
3437   if (sizesset) {
3438     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3439   }
3440   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3441   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3442 
3443   /* determine ownership of all (block) rows */
3444   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3445   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3446   else m = newMat->rmap->n; /* Set by user */
3447 
3448   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3449   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3450 
3451   /* First process needs enough room for process with most rows */
3452   if (!rank) {
3453     mmax = rowners[1];
3454     for (i=2; i<=size; i++) {
3455       mmax = PetscMax(mmax, rowners[i]);
3456     }
3457   } else mmax = m;
3458 
3459   rowners[0] = 0;
3460   for (i=2; i<=size; i++) {
3461     rowners[i] += rowners[i-1];
3462   }
3463   rstart = rowners[rank];
3464   rend   = rowners[rank+1];
3465 
3466   /* distribute row lengths to all processors */
3467   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3468   if (!rank) {
3469     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3470     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3471     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3472     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3473     for (j=0; j<m; j++) {
3474       procsnz[0] += ourlens[j];
3475     }
3476     for (i=1; i<size; i++) {
3477       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3478       /* calculate the number of nonzeros on each processor */
3479       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3480         procsnz[i] += rowlengths[j];
3481       }
3482       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3483     }
3484     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3485   } else {
3486     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3487   }
3488 
3489   if (!rank) {
3490     /* determine max buffer needed and allocate it */
3491     maxnz = 0;
3492     for (i=0; i<size; i++) {
3493       maxnz = PetscMax(maxnz,procsnz[i]);
3494     }
3495     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3496 
3497     /* read in my part of the matrix column indices  */
3498     nz   = procsnz[0];
3499     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3500     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3501 
3502     /* read in every one elses and ship off */
3503     for (i=1; i<size; i++) {
3504       nz   = procsnz[i];
3505       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3506       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3507     }
3508     ierr = PetscFree(cols);CHKERRQ(ierr);
3509   } else {
3510     /* determine buffer space needed for message */
3511     nz = 0;
3512     for (i=0; i<m; i++) {
3513       nz += ourlens[i];
3514     }
3515     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3516 
3517     /* receive message of column indices*/
3518     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3519   }
3520 
3521   /* determine column ownership if matrix is not square */
3522   if (N != M) {
3523     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3524     else n = newMat->cmap->n;
3525     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3526     cstart = cend - n;
3527   } else {
3528     cstart = rstart;
3529     cend   = rend;
3530     n      = cend - cstart;
3531   }
3532 
3533   /* loop over local rows, determining number of off diagonal entries */
3534   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3535   jj   = 0;
3536   for (i=0; i<m; i++) {
3537     for (j=0; j<ourlens[i]; j++) {
3538       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3539       jj++;
3540     }
3541   }
3542 
3543   for (i=0; i<m; i++) {
3544     ourlens[i] -= offlens[i];
3545   }
3546   if (!sizesset) {
3547     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3548   }
3549 
3550   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3551 
3552   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3553 
3554   for (i=0; i<m; i++) {
3555     ourlens[i] += offlens[i];
3556   }
3557 
3558   if (!rank) {
3559     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3560 
3561     /* read in my part of the matrix numerical values  */
3562     nz   = procsnz[0];
3563     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3564 
3565     /* insert into matrix */
3566     jj      = rstart;
3567     smycols = mycols;
3568     svals   = vals;
3569     for (i=0; i<m; i++) {
3570       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3571       smycols += ourlens[i];
3572       svals   += ourlens[i];
3573       jj++;
3574     }
3575 
3576     /* read in other processors and ship out */
3577     for (i=1; i<size; i++) {
3578       nz   = procsnz[i];
3579       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3580       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3581     }
3582     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3583   } else {
3584     /* receive numeric values */
3585     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3586 
3587     /* receive message of values*/
3588     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3589 
3590     /* insert into matrix */
3591     jj      = rstart;
3592     smycols = mycols;
3593     svals   = vals;
3594     for (i=0; i<m; i++) {
3595       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3596       smycols += ourlens[i];
3597       svals   += ourlens[i];
3598       jj++;
3599     }
3600   }
3601   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3602   ierr = PetscFree(vals);CHKERRQ(ierr);
3603   ierr = PetscFree(mycols);CHKERRQ(ierr);
3604   ierr = PetscFree(rowners);CHKERRQ(ierr);
3605   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3612 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3613 {
3614   PetscErrorCode ierr;
3615   IS             iscol_local;
3616   PetscInt       csize;
3617 
3618   PetscFunctionBegin;
3619   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3620   if (call == MAT_REUSE_MATRIX) {
3621     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3622     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3623   } else {
3624     PetscInt cbs;
3625     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3626     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3627     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3628   }
3629   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3630   if (call == MAT_INITIAL_MATRIX) {
3631     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3632     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3633   }
3634   PetscFunctionReturn(0);
3635 }
3636 
3637 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3638 #undef __FUNCT__
3639 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3640 /*
3641     Not great since it makes two copies of the submatrix, first an SeqAIJ
3642   in local and then by concatenating the local matrices the end result.
3643   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3644 
3645   Note: This requires a sequential iscol with all indices.
3646 */
3647 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3648 {
3649   PetscErrorCode ierr;
3650   PetscMPIInt    rank,size;
3651   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3652   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3653   PetscBool      allcolumns, colflag;
3654   Mat            M,Mreuse;
3655   MatScalar      *vwork,*aa;
3656   MPI_Comm       comm = ((PetscObject)mat)->comm;
3657   Mat_SeqAIJ     *aij;
3658 
3659 
3660   PetscFunctionBegin;
3661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663 
3664   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3665   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3666   if (colflag && ncol == mat->cmap->N) {
3667     allcolumns = PETSC_TRUE;
3668   } else {
3669     allcolumns = PETSC_FALSE;
3670   }
3671   if (call ==  MAT_REUSE_MATRIX) {
3672     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3673     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3674     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3675   } else {
3676     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3677   }
3678 
3679   /*
3680       m - number of local rows
3681       n - number of columns (same on all processors)
3682       rstart - first row in new global matrix generated
3683   */
3684   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3685   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3686   if (call == MAT_INITIAL_MATRIX) {
3687     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3688     ii  = aij->i;
3689     jj  = aij->j;
3690 
3691     /*
3692         Determine the number of non-zeros in the diagonal and off-diagonal
3693         portions of the matrix in order to do correct preallocation
3694     */
3695 
3696     /* first get start and end of "diagonal" columns */
3697     if (csize == PETSC_DECIDE) {
3698       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3699       if (mglobal == n) { /* square matrix */
3700         nlocal = m;
3701       } else {
3702         nlocal = n/size + ((n % size) > rank);
3703       }
3704     } else {
3705       nlocal = csize;
3706     }
3707     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3708     rstart = rend - nlocal;
3709     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3710 
3711     /* next, compute all the lengths */
3712     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3713     olens = dlens + m;
3714     for (i=0; i<m; i++) {
3715       jend = ii[i+1] - ii[i];
3716       olen = 0;
3717       dlen = 0;
3718       for (j=0; j<jend; j++) {
3719         if (*jj < rstart || *jj >= rend) olen++;
3720         else dlen++;
3721         jj++;
3722       }
3723       olens[i] = olen;
3724       dlens[i] = dlen;
3725     }
3726     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3727     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3728     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3729     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3730     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3731     ierr = PetscFree(dlens);CHKERRQ(ierr);
3732   } else {
3733     PetscInt ml,nl;
3734 
3735     M    = *newmat;
3736     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3737     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3738     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3739     /*
3740          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3741        rather than the slower MatSetValues().
3742     */
3743     M->was_assembled = PETSC_TRUE;
3744     M->assembled     = PETSC_FALSE;
3745   }
3746   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3747   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3748   ii   = aij->i;
3749   jj   = aij->j;
3750   aa   = aij->a;
3751   for (i=0; i<m; i++) {
3752     row   = rstart + i;
3753     nz    = ii[i+1] - ii[i];
3754     cwork = jj;     jj += nz;
3755     vwork = aa;     aa += nz;
3756     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3757   }
3758 
3759   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   *newmat = M;
3762 
3763   /* save submatrix used in processor for next request */
3764   if (call ==  MAT_INITIAL_MATRIX) {
3765     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3766     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3767   }
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 EXTERN_C_BEGIN
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3774 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3775 {
3776   PetscInt       m,cstart, cend,j,nnz,i,d;
3777   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3778   const PetscInt *JJ;
3779   PetscScalar    *values;
3780   PetscErrorCode ierr;
3781 
3782   PetscFunctionBegin;
3783   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3784 
3785   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3786   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3787   m      = B->rmap->n;
3788   cstart = B->cmap->rstart;
3789   cend   = B->cmap->rend;
3790   rstart = B->rmap->rstart;
3791 
3792   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3793 
3794 #if defined(PETSC_USE_DEBUGGING)
3795   for (i=0; i<m; i++) {
3796     nnz = Ii[i+1]- Ii[i];
3797     JJ  = J + Ii[i];
3798     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3799     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3800     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3801   }
3802 #endif
3803 
3804   for (i=0; i<m; i++) {
3805     nnz     = Ii[i+1]- Ii[i];
3806     JJ      = J + Ii[i];
3807     nnz_max = PetscMax(nnz_max,nnz);
3808     d       = 0;
3809     for (j=0; j<nnz; j++) {
3810       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3811     }
3812     d_nnz[i] = d;
3813     o_nnz[i] = nnz - d;
3814   }
3815   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3816   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3817 
3818   if (v) values = (PetscScalar*)v;
3819   else {
3820     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3821     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3822   }
3823 
3824   for (i=0; i<m; i++) {
3825     ii   = i + rstart;
3826     nnz  = Ii[i+1]- Ii[i];
3827     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3828   }
3829   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831 
3832   if (!v) {
3833     ierr = PetscFree(values);CHKERRQ(ierr);
3834   }
3835   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 EXTERN_C_END
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3842 /*@
3843    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3844    (the default parallel PETSc format).
3845 
3846    Collective on MPI_Comm
3847 
3848    Input Parameters:
3849 +  B - the matrix
3850 .  i - the indices into j for the start of each local row (starts with zero)
3851 .  j - the column indices for each local row (starts with zero)
3852 -  v - optional values in the matrix
3853 
3854    Level: developer
3855 
3856    Notes:
3857        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3858      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3859      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3860 
3861        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3862 
3863        The format which is used for the sparse matrix input, is equivalent to a
3864     row-major ordering.. i.e for the following matrix, the input data expected is
3865     as shown:
3866 
3867         1 0 0
3868         2 0 3     P0
3869        -------
3870         4 5 6     P1
3871 
3872      Process0 [P0]: rows_owned=[0,1]
3873         i =  {0,1,3}  [size = nrow+1  = 2+1]
3874         j =  {0,0,2}  [size = nz = 6]
3875         v =  {1,2,3}  [size = nz = 6]
3876 
3877      Process1 [P1]: rows_owned=[2]
3878         i =  {0,3}    [size = nrow+1  = 1+1]
3879         j =  {0,1,2}  [size = nz = 6]
3880         v =  {4,5,6}  [size = nz = 6]
3881 
3882 .keywords: matrix, aij, compressed row, sparse, parallel
3883 
3884 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3885           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3886 @*/
3887 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3888 {
3889   PetscErrorCode ierr;
3890 
3891   PetscFunctionBegin;
3892   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3898 /*@C
3899    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3900    (the default parallel PETSc format).  For good matrix assembly performance
3901    the user should preallocate the matrix storage by setting the parameters
3902    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3903    performance can be increased by more than a factor of 50.
3904 
3905    Collective on MPI_Comm
3906 
3907    Input Parameters:
3908 +  A - the matrix
3909 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3910            (same value is used for all local rows)
3911 .  d_nnz - array containing the number of nonzeros in the various rows of the
3912            DIAGONAL portion of the local submatrix (possibly different for each row)
3913            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3914            The size of this array is equal to the number of local rows, i.e 'm'.
3915            For matrices that will be factored, you must leave room for (and set)
3916            the diagonal entry even if it is zero.
3917 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3918            submatrix (same value is used for all local rows).
3919 -  o_nnz - array containing the number of nonzeros in the various rows of the
3920            OFF-DIAGONAL portion of the local submatrix (possibly different for
3921            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3922            structure. The size of this array is equal to the number
3923            of local rows, i.e 'm'.
3924 
3925    If the *_nnz parameter is given then the *_nz parameter is ignored
3926 
3927    The AIJ format (also called the Yale sparse matrix format or
3928    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3929    storage.  The stored row and column indices begin with zero.
3930    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3931 
3932    The parallel matrix is partitioned such that the first m0 rows belong to
3933    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3934    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3935 
3936    The DIAGONAL portion of the local submatrix of a processor can be defined
3937    as the submatrix which is obtained by extraction the part corresponding to
3938    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3939    first row that belongs to the processor, r2 is the last row belonging to
3940    the this processor, and c1-c2 is range of indices of the local part of a
3941    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3942    common case of a square matrix, the row and column ranges are the same and
3943    the DIAGONAL part is also square. The remaining portion of the local
3944    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3945 
3946    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3947 
3948    You can call MatGetInfo() to get information on how effective the preallocation was;
3949    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3950    You can also run with the option -info and look for messages with the string
3951    malloc in them to see if additional memory allocation was needed.
3952 
3953    Example usage:
3954 
3955    Consider the following 8x8 matrix with 34 non-zero values, that is
3956    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3957    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3958    as follows:
3959 
3960 .vb
3961             1  2  0  |  0  3  0  |  0  4
3962     Proc0   0  5  6  |  7  0  0  |  8  0
3963             9  0 10  | 11  0  0  | 12  0
3964     -------------------------------------
3965            13  0 14  | 15 16 17  |  0  0
3966     Proc1   0 18  0  | 19 20 21  |  0  0
3967             0  0  0  | 22 23  0  | 24  0
3968     -------------------------------------
3969     Proc2  25 26 27  |  0  0 28  | 29  0
3970            30  0  0  | 31 32 33  |  0 34
3971 .ve
3972 
3973    This can be represented as a collection of submatrices as:
3974 
3975 .vb
3976       A B C
3977       D E F
3978       G H I
3979 .ve
3980 
3981    Where the submatrices A,B,C are owned by proc0, D,E,F are
3982    owned by proc1, G,H,I are owned by proc2.
3983 
3984    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3985    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3986    The 'M','N' parameters are 8,8, and have the same values on all procs.
3987 
3988    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3989    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3990    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3991    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3992    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3993    matrix, ans [DF] as another SeqAIJ matrix.
3994 
3995    When d_nz, o_nz parameters are specified, d_nz storage elements are
3996    allocated for every row of the local diagonal submatrix, and o_nz
3997    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3998    One way to choose d_nz and o_nz is to use the max nonzerors per local
3999    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4000    In this case, the values of d_nz,o_nz are:
4001 .vb
4002      proc0 : dnz = 2, o_nz = 2
4003      proc1 : dnz = 3, o_nz = 2
4004      proc2 : dnz = 1, o_nz = 4
4005 .ve
4006    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4007    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4008    for proc3. i.e we are using 12+15+10=37 storage locations to store
4009    34 values.
4010 
4011    When d_nnz, o_nnz parameters are specified, the storage is specified
4012    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4013    In the above case the values for d_nnz,o_nnz are:
4014 .vb
4015      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4016      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4017      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4018 .ve
4019    Here the space allocated is sum of all the above values i.e 34, and
4020    hence pre-allocation is perfect.
4021 
4022    Level: intermediate
4023 
4024 .keywords: matrix, aij, compressed row, sparse, parallel
4025 
4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4027           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4028 @*/
4029 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4030 {
4031   PetscErrorCode ierr;
4032 
4033   PetscFunctionBegin;
4034   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4035   PetscValidType(B,1);
4036   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4037   PetscFunctionReturn(0);
4038 }
4039 
4040 #undef __FUNCT__
4041 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4042 /*@
4043      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4044          CSR format the local rows.
4045 
4046    Collective on MPI_Comm
4047 
4048    Input Parameters:
4049 +  comm - MPI communicator
4050 .  m - number of local rows (Cannot be PETSC_DECIDE)
4051 .  n - This value should be the same as the local size used in creating the
4052        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4053        calculated if N is given) For square matrices n is almost always m.
4054 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4055 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4056 .   i - row indices
4057 .   j - column indices
4058 -   a - matrix values
4059 
4060    Output Parameter:
4061 .   mat - the matrix
4062 
4063    Level: intermediate
4064 
4065    Notes:
4066        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4067      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4068      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4069 
4070        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4071 
4072        The format which is used for the sparse matrix input, is equivalent to a
4073     row-major ordering.. i.e for the following matrix, the input data expected is
4074     as shown:
4075 
4076         1 0 0
4077         2 0 3     P0
4078        -------
4079         4 5 6     P1
4080 
4081      Process0 [P0]: rows_owned=[0,1]
4082         i =  {0,1,3}  [size = nrow+1  = 2+1]
4083         j =  {0,0,2}  [size = nz = 6]
4084         v =  {1,2,3}  [size = nz = 6]
4085 
4086      Process1 [P1]: rows_owned=[2]
4087         i =  {0,3}    [size = nrow+1  = 1+1]
4088         j =  {0,1,2}  [size = nz = 6]
4089         v =  {4,5,6}  [size = nz = 6]
4090 
4091 .keywords: matrix, aij, compressed row, sparse, parallel
4092 
4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4094           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4095 @*/
4096 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4097 {
4098   PetscErrorCode ierr;
4099 
4100   PetscFunctionBegin;
4101   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4102   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4103   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4104   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4105   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4106   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4107   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4108   PetscFunctionReturn(0);
4109 }
4110 
4111 #undef __FUNCT__
4112 #define __FUNCT__ "MatCreateAIJ"
4113 /*@C
4114    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4115    (the default parallel PETSc format).  For good matrix assembly performance
4116    the user should preallocate the matrix storage by setting the parameters
4117    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4118    performance can be increased by more than a factor of 50.
4119 
4120    Collective on MPI_Comm
4121 
4122    Input Parameters:
4123 +  comm - MPI communicator
4124 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4125            This value should be the same as the local size used in creating the
4126            y vector for the matrix-vector product y = Ax.
4127 .  n - This value should be the same as the local size used in creating the
4128        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4129        calculated if N is given) For square matrices n is almost always m.
4130 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4131 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4132 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4133            (same value is used for all local rows)
4134 .  d_nnz - array containing the number of nonzeros in the various rows of the
4135            DIAGONAL portion of the local submatrix (possibly different for each row)
4136            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4137            The size of this array is equal to the number of local rows, i.e 'm'.
4138 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4139            submatrix (same value is used for all local rows).
4140 -  o_nnz - array containing the number of nonzeros in the various rows of the
4141            OFF-DIAGONAL portion of the local submatrix (possibly different for
4142            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4143            structure. The size of this array is equal to the number
4144            of local rows, i.e 'm'.
4145 
4146    Output Parameter:
4147 .  A - the matrix
4148 
4149    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4150    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4151    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4152 
4153    Notes:
4154    If the *_nnz parameter is given then the *_nz parameter is ignored
4155 
4156    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4157    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4158    storage requirements for this matrix.
4159 
4160    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4161    processor than it must be used on all processors that share the object for
4162    that argument.
4163 
4164    The user MUST specify either the local or global matrix dimensions
4165    (possibly both).
4166 
4167    The parallel matrix is partitioned across processors such that the
4168    first m0 rows belong to process 0, the next m1 rows belong to
4169    process 1, the next m2 rows belong to process 2 etc.. where
4170    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4171    values corresponding to [m x N] submatrix.
4172 
4173    The columns are logically partitioned with the n0 columns belonging
4174    to 0th partition, the next n1 columns belonging to the next
4175    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4176 
4177    The DIAGONAL portion of the local submatrix on any given processor
4178    is the submatrix corresponding to the rows and columns m,n
4179    corresponding to the given processor. i.e diagonal matrix on
4180    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4181    etc. The remaining portion of the local submatrix [m x (N-n)]
4182    constitute the OFF-DIAGONAL portion. The example below better
4183    illustrates this concept.
4184 
4185    For a square global matrix we define each processor's diagonal portion
4186    to be its local rows and the corresponding columns (a square submatrix);
4187    each processor's off-diagonal portion encompasses the remainder of the
4188    local matrix (a rectangular submatrix).
4189 
4190    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4191 
4192    When calling this routine with a single process communicator, a matrix of
4193    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4194    type of communicator, use the construction mechanism:
4195      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4196 
4197    By default, this format uses inodes (identical nodes) when possible.
4198    We search for consecutive rows with the same nonzero structure, thereby
4199    reusing matrix information to achieve increased efficiency.
4200 
4201    Options Database Keys:
4202 +  -mat_no_inode  - Do not use inodes
4203 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4204 -  -mat_aij_oneindex - Internally use indexing starting at 1
4205         rather than 0.  Note that when calling MatSetValues(),
4206         the user still MUST index entries starting at 0!
4207 
4208 
4209    Example usage:
4210 
4211    Consider the following 8x8 matrix with 34 non-zero values, that is
4212    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4213    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4214    as follows:
4215 
4216 .vb
4217             1  2  0  |  0  3  0  |  0  4
4218     Proc0   0  5  6  |  7  0  0  |  8  0
4219             9  0 10  | 11  0  0  | 12  0
4220     -------------------------------------
4221            13  0 14  | 15 16 17  |  0  0
4222     Proc1   0 18  0  | 19 20 21  |  0  0
4223             0  0  0  | 22 23  0  | 24  0
4224     -------------------------------------
4225     Proc2  25 26 27  |  0  0 28  | 29  0
4226            30  0  0  | 31 32 33  |  0 34
4227 .ve
4228 
4229    This can be represented as a collection of submatrices as:
4230 
4231 .vb
4232       A B C
4233       D E F
4234       G H I
4235 .ve
4236 
4237    Where the submatrices A,B,C are owned by proc0, D,E,F are
4238    owned by proc1, G,H,I are owned by proc2.
4239 
4240    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4241    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4242    The 'M','N' parameters are 8,8, and have the same values on all procs.
4243 
4244    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4245    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4246    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4247    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4248    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4249    matrix, ans [DF] as another SeqAIJ matrix.
4250 
4251    When d_nz, o_nz parameters are specified, d_nz storage elements are
4252    allocated for every row of the local diagonal submatrix, and o_nz
4253    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4254    One way to choose d_nz and o_nz is to use the max nonzerors per local
4255    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4256    In this case, the values of d_nz,o_nz are:
4257 .vb
4258      proc0 : dnz = 2, o_nz = 2
4259      proc1 : dnz = 3, o_nz = 2
4260      proc2 : dnz = 1, o_nz = 4
4261 .ve
4262    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4263    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4264    for proc3. i.e we are using 12+15+10=37 storage locations to store
4265    34 values.
4266 
4267    When d_nnz, o_nnz parameters are specified, the storage is specified
4268    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4269    In the above case the values for d_nnz,o_nnz are:
4270 .vb
4271      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4272      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4273      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4274 .ve
4275    Here the space allocated is sum of all the above values i.e 34, and
4276    hence pre-allocation is perfect.
4277 
4278    Level: intermediate
4279 
4280 .keywords: matrix, aij, compressed row, sparse, parallel
4281 
4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4283           MPIAIJ, MatCreateMPIAIJWithArrays()
4284 @*/
4285 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4286 {
4287   PetscErrorCode ierr;
4288   PetscMPIInt    size;
4289 
4290   PetscFunctionBegin;
4291   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4292   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4293   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4294   if (size > 1) {
4295     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4296     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4297   } else {
4298     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4299     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4300   }
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4306 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4307 {
4308   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4309 
4310   PetscFunctionBegin;
4311   *Ad     = a->A;
4312   *Ao     = a->B;
4313   *colmap = a->garray;
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4319 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4320 {
4321   PetscErrorCode ierr;
4322   PetscInt       i;
4323   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4324 
4325   PetscFunctionBegin;
4326   if (coloring->ctype == IS_COLORING_GLOBAL) {
4327     ISColoringValue *allcolors,*colors;
4328     ISColoring      ocoloring;
4329 
4330     /* set coloring for diagonal portion */
4331     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4332 
4333     /* set coloring for off-diagonal portion */
4334     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4335     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4336     for (i=0; i<a->B->cmap->n; i++) {
4337       colors[i] = allcolors[a->garray[i]];
4338     }
4339     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4340     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4341     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4342     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4343   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4344     ISColoringValue *colors;
4345     PetscInt        *larray;
4346     ISColoring      ocoloring;
4347 
4348     /* set coloring for diagonal portion */
4349     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4350     for (i=0; i<a->A->cmap->n; i++) {
4351       larray[i] = i + A->cmap->rstart;
4352     }
4353     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4354     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4355     for (i=0; i<a->A->cmap->n; i++) {
4356       colors[i] = coloring->colors[larray[i]];
4357     }
4358     ierr = PetscFree(larray);CHKERRQ(ierr);
4359     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4360     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4361     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4362 
4363     /* set coloring for off-diagonal portion */
4364     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4365     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4366     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4367     for (i=0; i<a->B->cmap->n; i++) {
4368       colors[i] = coloring->colors[larray[i]];
4369     }
4370     ierr = PetscFree(larray);CHKERRQ(ierr);
4371     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4372     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4373     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4374   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4375   PetscFunctionReturn(0);
4376 }
4377 
4378 #undef __FUNCT__
4379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4380 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4381 {
4382   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4383   PetscErrorCode ierr;
4384 
4385   PetscFunctionBegin;
4386   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4387   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #undef __FUNCT__
4392 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4393 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4394 {
4395   PetscErrorCode ierr;
4396   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4397   PetscInt       *indx;
4398 
4399   PetscFunctionBegin;
4400   /* This routine will ONLY return MPIAIJ type matrix */
4401   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4402   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4403   if (n == PETSC_DECIDE) {
4404     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4405   }
4406   /* Check sum(n) = N */
4407   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4408   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4409 
4410   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4411   rstart -= m;
4412 
4413   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4414   for (i=0; i<m; i++) {
4415     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4416     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4417     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4418   }
4419 
4420   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4421   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4422   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4423   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4424   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4425   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #undef __FUNCT__
4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4431 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4432 {
4433   PetscErrorCode ierr;
4434   PetscInt       m,N,i,rstart,nnz,Ii;
4435   PetscInt       *indx;
4436   PetscScalar    *values;
4437 
4438   PetscFunctionBegin;
4439   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4440   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4441   for (i=0; i<m; i++) {
4442     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4443     Ii   = i + rstart;
4444     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4445     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4446   }
4447   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4448   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4454 /*@
4455       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4456                  matrices from each processor
4457 
4458     Collective on MPI_Comm
4459 
4460    Input Parameters:
4461 +    comm - the communicators the parallel matrix will live on
4462 .    inmat - the input sequential matrices
4463 .    n - number of local columns (or PETSC_DECIDE)
4464 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4465 
4466    Output Parameter:
4467 .    outmat - the parallel matrix generated
4468 
4469     Level: advanced
4470 
4471    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4472 
4473 @*/
4474 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4475 {
4476   PetscErrorCode ierr;
4477 
4478   PetscFunctionBegin;
4479   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4480   if (scall == MAT_INITIAL_MATRIX) {
4481     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4482   }
4483   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4484   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4485   PetscFunctionReturn(0);
4486 }
4487 
4488 #undef __FUNCT__
4489 #define __FUNCT__ "MatFileSplit"
4490 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4491 {
4492   PetscErrorCode    ierr;
4493   PetscMPIInt       rank;
4494   PetscInt          m,N,i,rstart,nnz;
4495   size_t            len;
4496   const PetscInt    *indx;
4497   PetscViewer       out;
4498   char              *name;
4499   Mat               B;
4500   const PetscScalar *values;
4501 
4502   PetscFunctionBegin;
4503   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4504   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4505   /* Should this be the type of the diagonal block of A? */
4506   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4507   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4508   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4509   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4510   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4511   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4512   for (i=0; i<m; i++) {
4513     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4514     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4515     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516   }
4517   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519 
4520   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4521   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4522   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4523   sprintf(name,"%s.%d",outfile,rank);
4524   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4525   ierr = PetscFree(name);CHKERRQ(ierr);
4526   ierr = MatView(B,out);CHKERRQ(ierr);
4527   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4528   ierr = MatDestroy(&B);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4533 #undef __FUNCT__
4534 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4535 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4536 {
4537   PetscErrorCode      ierr;
4538   Mat_Merge_SeqsToMPI *merge;
4539   PetscContainer      container;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4543   if (container) {
4544     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4557     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4558     ierr = PetscFree(merge);CHKERRQ(ierr);
4559     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4560   }
4561   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4562   PetscFunctionReturn(0);
4563 }
4564 
4565 #include <../src/mat/utils/freespace.h>
4566 #include <petscbt.h>
4567 
4568 #undef __FUNCT__
4569 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4570 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4571 {
4572   PetscErrorCode      ierr;
4573   MPI_Comm            comm=((PetscObject)mpimat)->comm;
4574   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4575   PetscMPIInt         size,rank,taga,*len_s;
4576   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4577   PetscInt            proc,m;
4578   PetscInt            **buf_ri,**buf_rj;
4579   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4580   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4581   MPI_Request         *s_waits,*r_waits;
4582   MPI_Status          *status;
4583   MatScalar           *aa=a->a;
4584   MatScalar           **abuf_r,*ba_i;
4585   Mat_Merge_SeqsToMPI *merge;
4586   PetscContainer      container;
4587 
4588   PetscFunctionBegin;
4589   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4590 
4591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4592   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4593 
4594   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4595   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4596 
4597   bi     = merge->bi;
4598   bj     = merge->bj;
4599   buf_ri = merge->buf_ri;
4600   buf_rj = merge->buf_rj;
4601 
4602   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4603   owners = merge->rowmap->range;
4604   len_s  = merge->len_s;
4605 
4606   /* send and recv matrix values */
4607   /*-----------------------------*/
4608   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4609   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4610 
4611   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4612   for (proc=0,k=0; proc<size; proc++) {
4613     if (!len_s[proc]) continue;
4614     i    = owners[proc];
4615     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4616     k++;
4617   }
4618 
4619   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4620   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4621   ierr = PetscFree(status);CHKERRQ(ierr);
4622 
4623   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4624   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4625 
4626   /* insert mat values of mpimat */
4627   /*----------------------------*/
4628   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4629   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4630 
4631   for (k=0; k<merge->nrecv; k++) {
4632     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4633     nrows       = *(buf_ri_k[k]);
4634     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4635     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4636   }
4637 
4638   /* set values of ba */
4639   m = merge->rowmap->n;
4640   for (i=0; i<m; i++) {
4641     arow = owners[rank] + i;
4642     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4643     bnzi = bi[i+1] - bi[i];
4644     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4645 
4646     /* add local non-zero vals of this proc's seqmat into ba */
4647     anzi   = ai[arow+1] - ai[arow];
4648     aj     = a->j + ai[arow];
4649     aa     = a->a + ai[arow];
4650     nextaj = 0;
4651     for (j=0; nextaj<anzi; j++) {
4652       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4653         ba_i[j] += aa[nextaj++];
4654       }
4655     }
4656 
4657     /* add received vals into ba */
4658     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4659       /* i-th row */
4660       if (i == *nextrow[k]) {
4661         anzi   = *(nextai[k]+1) - *nextai[k];
4662         aj     = buf_rj[k] + *(nextai[k]);
4663         aa     = abuf_r[k] + *(nextai[k]);
4664         nextaj = 0;
4665         for (j=0; nextaj<anzi; j++) {
4666           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4667             ba_i[j] += aa[nextaj++];
4668           }
4669         }
4670         nextrow[k]++; nextai[k]++;
4671       }
4672     }
4673     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4674   }
4675   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4676   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4677 
4678   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4679   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4680   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4681   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4682   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4683   PetscFunctionReturn(0);
4684 }
4685 
4686 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4687 
4688 #undef __FUNCT__
4689 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4690 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4691 {
4692   PetscErrorCode      ierr;
4693   Mat                 B_mpi;
4694   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4695   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4696   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4697   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4698   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4699   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4700   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4701   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4702   MPI_Status          *status;
4703   PetscFreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4704   PetscBT             lnkbt;
4705   Mat_Merge_SeqsToMPI *merge;
4706   PetscContainer      container;
4707 
4708   PetscFunctionBegin;
4709   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4710 
4711   /* make sure it is a PETSc comm */
4712   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4714   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4715 
4716   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4717   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4718 
4719   /* determine row ownership */
4720   /*---------------------------------------------------------*/
4721   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4722   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4723   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4724   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4725   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4726   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4727   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4728 
4729   m      = merge->rowmap->n;
4730   owners = merge->rowmap->range;
4731 
4732   /* determine the number of messages to send, their lengths */
4733   /*---------------------------------------------------------*/
4734   len_s = merge->len_s;
4735 
4736   len          = 0; /* length of buf_si[] */
4737   merge->nsend = 0;
4738   for (proc=0; proc<size; proc++) {
4739     len_si[proc] = 0;
4740     if (proc == rank) {
4741       len_s[proc] = 0;
4742     } else {
4743       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4744       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4745     }
4746     if (len_s[proc]) {
4747       merge->nsend++;
4748       nrows = 0;
4749       for (i=owners[proc]; i<owners[proc+1]; i++) {
4750         if (ai[i+1] > ai[i]) nrows++;
4751       }
4752       len_si[proc] = 2*(nrows+1);
4753       len += len_si[proc];
4754     }
4755   }
4756 
4757   /* determine the number and length of messages to receive for ij-structure */
4758   /*-------------------------------------------------------------------------*/
4759   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4760   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4761 
4762   /* post the Irecv of j-structure */
4763   /*-------------------------------*/
4764   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4765   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4766 
4767   /* post the Isend of j-structure */
4768   /*--------------------------------*/
4769   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4770 
4771   for (proc=0, k=0; proc<size; proc++) {
4772     if (!len_s[proc]) continue;
4773     i    = owners[proc];
4774     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4775     k++;
4776   }
4777 
4778   /* receives and sends of j-structure are complete */
4779   /*------------------------------------------------*/
4780   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4781   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4782 
4783   /* send and recv i-structure */
4784   /*---------------------------*/
4785   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4786   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4787 
4788   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4789   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4790   for (proc=0,k=0; proc<size; proc++) {
4791     if (!len_s[proc]) continue;
4792     /* form outgoing message for i-structure:
4793          buf_si[0]:                 nrows to be sent
4794                [1:nrows]:           row index (global)
4795                [nrows+1:2*nrows+1]: i-structure index
4796     */
4797     /*-------------------------------------------*/
4798     nrows       = len_si[proc]/2 - 1;
4799     buf_si_i    = buf_si + nrows+1;
4800     buf_si[0]   = nrows;
4801     buf_si_i[0] = 0;
4802     nrows       = 0;
4803     for (i=owners[proc]; i<owners[proc+1]; i++) {
4804       anzi = ai[i+1] - ai[i];
4805       if (anzi) {
4806         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4807         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4808         nrows++;
4809       }
4810     }
4811     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4812     k++;
4813     buf_si += len_si[proc];
4814   }
4815 
4816   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4817   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4818 
4819   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4820   for (i=0; i<merge->nrecv; i++) {
4821     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4822   }
4823 
4824   ierr = PetscFree(len_si);CHKERRQ(ierr);
4825   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4826   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4827   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4828   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4829   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4830   ierr = PetscFree(status);CHKERRQ(ierr);
4831 
4832   /* compute a local seq matrix in each processor */
4833   /*----------------------------------------------*/
4834   /* allocate bi array and free space for accumulating nonzero column info */
4835   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4836   bi[0] = 0;
4837 
4838   /* create and initialize a linked list */
4839   nlnk = N+1;
4840   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4841 
4842   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4843   len  = ai[owners[rank+1]] - ai[owners[rank]];
4844   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4845 
4846   current_space = free_space;
4847 
4848   /* determine symbolic info for each local row */
4849   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4850 
4851   for (k=0; k<merge->nrecv; k++) {
4852     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4853     nrows       = *buf_ri_k[k];
4854     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4855     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4856   }
4857 
4858   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4859   len  = 0;
4860   for (i=0; i<m; i++) {
4861     bnzi = 0;
4862     /* add local non-zero cols of this proc's seqmat into lnk */
4863     arow = owners[rank] + i;
4864     anzi = ai[arow+1] - ai[arow];
4865     aj   = a->j + ai[arow];
4866     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4867     bnzi += nlnk;
4868     /* add received col data into lnk */
4869     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4870       if (i == *nextrow[k]) { /* i-th row */
4871         anzi  = *(nextai[k]+1) - *nextai[k];
4872         aj    = buf_rj[k] + *nextai[k];
4873         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4874         bnzi += nlnk;
4875         nextrow[k]++; nextai[k]++;
4876       }
4877     }
4878     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4879 
4880     /* if free space is not available, make more free space */
4881     if (current_space->local_remaining<bnzi) {
4882       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4883       nspacedouble++;
4884     }
4885     /* copy data into free space, then initialize lnk */
4886     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4887     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4888 
4889     current_space->array           += bnzi;
4890     current_space->local_used      += bnzi;
4891     current_space->local_remaining -= bnzi;
4892 
4893     bi[i+1] = bi[i] + bnzi;
4894   }
4895 
4896   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4897 
4898   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4899   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4900   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4901 
4902   /* create symbolic parallel matrix B_mpi */
4903   /*---------------------------------------*/
4904   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4905   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4906   if (n==PETSC_DECIDE) {
4907     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4908   } else {
4909     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4910   }
4911   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4912   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4913   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4914   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4915   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4916 
4917   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4918   B_mpi->assembled    = PETSC_FALSE;
4919   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4920   merge->bi           = bi;
4921   merge->bj           = bj;
4922   merge->buf_ri       = buf_ri;
4923   merge->buf_rj       = buf_rj;
4924   merge->coi          = PETSC_NULL;
4925   merge->coj          = PETSC_NULL;
4926   merge->owners_co    = PETSC_NULL;
4927 
4928   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4929 
4930   /* attach the supporting struct to B_mpi for reuse */
4931   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4932   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4933   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4934   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4935   *mpimat = B_mpi;
4936 
4937   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4943 /*@C
4944       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4945                  matrices from each processor
4946 
4947     Collective on MPI_Comm
4948 
4949    Input Parameters:
4950 +    comm - the communicators the parallel matrix will live on
4951 .    seqmat - the input sequential matrices
4952 .    m - number of local rows (or PETSC_DECIDE)
4953 .    n - number of local columns (or PETSC_DECIDE)
4954 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4955 
4956    Output Parameter:
4957 .    mpimat - the parallel matrix generated
4958 
4959     Level: advanced
4960 
4961    Notes:
4962      The dimensions of the sequential matrix in each processor MUST be the same.
4963      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4964      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4965 @*/
4966 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4967 {
4968   PetscErrorCode ierr;
4969   PetscMPIInt    size;
4970 
4971   PetscFunctionBegin;
4972   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4973   if (size == 1) {
4974     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4975     if (scall == MAT_INITIAL_MATRIX) {
4976       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4977     } else {
4978       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4979     }
4980     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981     PetscFunctionReturn(0);
4982   }
4983   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4984   if (scall == MAT_INITIAL_MATRIX) {
4985     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4986   }
4987   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4988   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4989   PetscFunctionReturn(0);
4990 }
4991 
4992 #undef __FUNCT__
4993 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4994 /*@
4995      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4996           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4997           with MatGetSize()
4998 
4999     Not Collective
5000 
5001    Input Parameters:
5002 +    A - the matrix
5003 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5004 
5005    Output Parameter:
5006 .    A_loc - the local sequential matrix generated
5007 
5008     Level: developer
5009 
5010 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5011 
5012 @*/
5013 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5014 {
5015   PetscErrorCode ierr;
5016   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5017   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5018   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5019   MatScalar      *aa=a->a,*ba=b->a,*cam;
5020   PetscScalar    *ca;
5021   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5022   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5023   PetscBool      match;
5024 
5025   PetscFunctionBegin;
5026   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5027   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5028   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5029   if (scall == MAT_INITIAL_MATRIX) {
5030     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5031     ci[0] = 0;
5032     for (i=0; i<am; i++) {
5033       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5034     }
5035     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5036     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5037     k    = 0;
5038     for (i=0; i<am; i++) {
5039       ncols_o = bi[i+1] - bi[i];
5040       ncols_d = ai[i+1] - ai[i];
5041       /* off-diagonal portion of A */
5042       for (jo=0; jo<ncols_o; jo++) {
5043         col = cmap[*bj];
5044         if (col >= cstart) break;
5045         cj[k]   = col; bj++;
5046         ca[k++] = *ba++;
5047       }
5048       /* diagonal portion of A */
5049       for (j=0; j<ncols_d; j++) {
5050         cj[k]   = cstart + *aj++;
5051         ca[k++] = *aa++;
5052       }
5053       /* off-diagonal portion of A */
5054       for (j=jo; j<ncols_o; j++) {
5055         cj[k]   = cmap[*bj++];
5056         ca[k++] = *ba++;
5057       }
5058     }
5059     /* put together the new matrix */
5060     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5061     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5062     /* Since these are PETSc arrays, change flags to free them as necessary. */
5063     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5064     mat->free_a  = PETSC_TRUE;
5065     mat->free_ij = PETSC_TRUE;
5066     mat->nonew   = 0;
5067   } else if (scall == MAT_REUSE_MATRIX) {
5068     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5069     ci = mat->i; cj = mat->j; cam = mat->a;
5070     for (i=0; i<am; i++) {
5071       /* off-diagonal portion of A */
5072       ncols_o = bi[i+1] - bi[i];
5073       for (jo=0; jo<ncols_o; jo++) {
5074         col = cmap[*bj];
5075         if (col >= cstart) break;
5076         *cam++ = *ba++; bj++;
5077       }
5078       /* diagonal portion of A */
5079       ncols_d = ai[i+1] - ai[i];
5080       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5081       /* off-diagonal portion of A */
5082       for (j=jo; j<ncols_o; j++) {
5083         *cam++ = *ba++; bj++;
5084       }
5085     }
5086   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5087   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5088   PetscFunctionReturn(0);
5089 }
5090 
5091 #undef __FUNCT__
5092 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5093 /*@C
5094      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5095 
5096     Not Collective
5097 
5098    Input Parameters:
5099 +    A - the matrix
5100 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5101 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5102 
5103    Output Parameter:
5104 .    A_loc - the local sequential matrix generated
5105 
5106     Level: developer
5107 
5108 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5109 
5110 @*/
5111 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5112 {
5113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5114   PetscErrorCode ierr;
5115   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5116   IS             isrowa,iscola;
5117   Mat            *aloc;
5118   PetscBool      match;
5119 
5120   PetscFunctionBegin;
5121   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5122   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5123   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5124   if (!row) {
5125     start = A->rmap->rstart; end = A->rmap->rend;
5126     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5127   } else {
5128     isrowa = *row;
5129   }
5130   if (!col) {
5131     start = A->cmap->rstart;
5132     cmap  = a->garray;
5133     nzA   = a->A->cmap->n;
5134     nzB   = a->B->cmap->n;
5135     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5136     ncols = 0;
5137     for (i=0; i<nzB; i++) {
5138       if (cmap[i] < start) idx[ncols++] = cmap[i];
5139       else break;
5140     }
5141     imark = i;
5142     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5143     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5144     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5145   } else {
5146     iscola = *col;
5147   }
5148   if (scall != MAT_INITIAL_MATRIX) {
5149     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5150     aloc[0] = *A_loc;
5151   }
5152   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5153   *A_loc = aloc[0];
5154   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5155   if (!row) {
5156     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5157   }
5158   if (!col) {
5159     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5160   }
5161   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5162   PetscFunctionReturn(0);
5163 }
5164 
5165 #undef __FUNCT__
5166 #define __FUNCT__ "MatGetBrowsOfAcols"
5167 /*@C
5168     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5169 
5170     Collective on Mat
5171 
5172    Input Parameters:
5173 +    A,B - the matrices in mpiaij format
5174 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5175 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5176 
5177    Output Parameter:
5178 +    rowb, colb - index sets of rows and columns of B to extract
5179 -    B_seq - the sequential matrix generated
5180 
5181     Level: developer
5182 
5183 @*/
5184 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5185 {
5186   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5187   PetscErrorCode ierr;
5188   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5189   IS             isrowb,iscolb;
5190   Mat            *bseq=PETSC_NULL;
5191 
5192   PetscFunctionBegin;
5193   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5194     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5195   }
5196   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5197 
5198   if (scall == MAT_INITIAL_MATRIX) {
5199     start = A->cmap->rstart;
5200     cmap  = a->garray;
5201     nzA   = a->A->cmap->n;
5202     nzB   = a->B->cmap->n;
5203     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5204     ncols = 0;
5205     for (i=0; i<nzB; i++) {  /* row < local row index */
5206       if (cmap[i] < start) idx[ncols++] = cmap[i];
5207       else break;
5208     }
5209     imark = i;
5210     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5211     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5212     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5213     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5214   } else {
5215     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5216     isrowb  = *rowb; iscolb = *colb;
5217     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5218     bseq[0] = *B_seq;
5219   }
5220   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5221   *B_seq = bseq[0];
5222   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5223   if (!rowb) {
5224     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5225   } else {
5226     *rowb = isrowb;
5227   }
5228   if (!colb) {
5229     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5230   } else {
5231     *colb = iscolb;
5232   }
5233   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5234   PetscFunctionReturn(0);
5235 }
5236 
5237 #undef __FUNCT__
5238 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5239 /*
5240     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5241     of the OFF-DIAGONAL portion of local A
5242 
5243     Collective on Mat
5244 
5245    Input Parameters:
5246 +    A,B - the matrices in mpiaij format
5247 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5248 
5249    Output Parameter:
5250 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5251 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5252 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5253 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5254 
5255     Level: developer
5256 
5257 */
5258 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5259 {
5260   VecScatter_MPI_General *gen_to,*gen_from;
5261   PetscErrorCode         ierr;
5262   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5263   Mat_SeqAIJ             *b_oth;
5264   VecScatter             ctx =a->Mvctx;
5265   MPI_Comm               comm=((PetscObject)ctx)->comm;
5266   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5267   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5268   PetscScalar            *rvalues,*svalues;
5269   MatScalar              *b_otha,*bufa,*bufA;
5270   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5271   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5272   MPI_Status             *sstatus,rstatus;
5273   PetscMPIInt            jj;
5274   PetscInt               *cols,sbs,rbs;
5275   PetscScalar            *vals;
5276 
5277   PetscFunctionBegin;
5278   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5279     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5280   }
5281   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5283 
5284   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5285   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5286   rvalues  = gen_from->values; /* holds the length of receiving row */
5287   svalues  = gen_to->values;   /* holds the length of sending row */
5288   nrecvs   = gen_from->n;
5289   nsends   = gen_to->n;
5290 
5291   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5292   srow    = gen_to->indices;    /* local row index to be sent */
5293   sstarts = gen_to->starts;
5294   sprocs  = gen_to->procs;
5295   sstatus = gen_to->sstatus;
5296   sbs     = gen_to->bs;
5297   rstarts = gen_from->starts;
5298   rprocs  = gen_from->procs;
5299   rbs     = gen_from->bs;
5300 
5301   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5302   if (scall == MAT_INITIAL_MATRIX) {
5303     /* i-array */
5304     /*---------*/
5305     /*  post receives */
5306     for (i=0; i<nrecvs; i++) {
5307       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5308       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5309       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5310     }
5311 
5312     /* pack the outgoing message */
5313     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5314 
5315     sstartsj[0] = 0;
5316     rstartsj[0] = 0;
5317     len         = 0; /* total length of j or a array to be sent */
5318     k           = 0;
5319     for (i=0; i<nsends; i++) {
5320       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5321       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5322       for (j=0; j<nrows; j++) {
5323         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5324         for (l=0; l<sbs; l++) {
5325           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5326 
5327           rowlen[j*sbs+l] = ncols;
5328 
5329           len += ncols;
5330           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5331         }
5332         k++;
5333       }
5334       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5335 
5336       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5337     }
5338     /* recvs and sends of i-array are completed */
5339     i = nrecvs;
5340     while (i--) {
5341       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5342     }
5343     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5344 
5345     /* allocate buffers for sending j and a arrays */
5346     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5347     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5348 
5349     /* create i-array of B_oth */
5350     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5351 
5352     b_othi[0] = 0;
5353     len       = 0; /* total length of j or a array to be received */
5354     k         = 0;
5355     for (i=0; i<nrecvs; i++) {
5356       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5357       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5358       for (j=0; j<nrows; j++) {
5359         b_othi[k+1] = b_othi[k] + rowlen[j];
5360         len        += rowlen[j]; k++;
5361       }
5362       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5363     }
5364 
5365     /* allocate space for j and a arrrays of B_oth */
5366     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5367     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5368 
5369     /* j-array */
5370     /*---------*/
5371     /*  post receives of j-array */
5372     for (i=0; i<nrecvs; i++) {
5373       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5374       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5375     }
5376 
5377     /* pack the outgoing message j-array */
5378     k = 0;
5379     for (i=0; i<nsends; i++) {
5380       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5381       bufJ  = bufj+sstartsj[i];
5382       for (j=0; j<nrows; j++) {
5383         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5384         for (ll=0; ll<sbs; ll++) {
5385           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5386           for (l=0; l<ncols; l++) {
5387             *bufJ++ = cols[l];
5388           }
5389           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5390         }
5391       }
5392       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* recvs and sends of j-array are completed */
5396     i = nrecvs;
5397     while (i--) {
5398       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5399     }
5400     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5401   } else if (scall == MAT_REUSE_MATRIX) {
5402     sstartsj = *startsj_s;
5403     rstartsj = *startsj_r;
5404     bufa     = *bufa_ptr;
5405     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5406     b_otha   = b_oth->a;
5407   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5408 
5409   /* a-array */
5410   /*---------*/
5411   /*  post receives of a-array */
5412   for (i=0; i<nrecvs; i++) {
5413     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5414     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5415   }
5416 
5417   /* pack the outgoing message a-array */
5418   k = 0;
5419   for (i=0; i<nsends; i++) {
5420     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5421     bufA  = bufa+sstartsj[i];
5422     for (j=0; j<nrows; j++) {
5423       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5424       for (ll=0; ll<sbs; ll++) {
5425         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5426         for (l=0; l<ncols; l++) {
5427           *bufA++ = vals[l];
5428         }
5429         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5430       }
5431     }
5432     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5433   }
5434   /* recvs and sends of a-array are completed */
5435   i = nrecvs;
5436   while (i--) {
5437     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5438   }
5439   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5440   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5441 
5442   if (scall == MAT_INITIAL_MATRIX) {
5443     /* put together the new matrix */
5444     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5445 
5446     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5447     /* Since these are PETSc arrays, change flags to free them as necessary. */
5448     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5449     b_oth->free_a  = PETSC_TRUE;
5450     b_oth->free_ij = PETSC_TRUE;
5451     b_oth->nonew   = 0;
5452 
5453     ierr = PetscFree(bufj);CHKERRQ(ierr);
5454     if (!startsj_s || !bufa_ptr) {
5455       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5456       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5457     } else {
5458       *startsj_s = sstartsj;
5459       *startsj_r = rstartsj;
5460       *bufa_ptr  = bufa;
5461     }
5462   }
5463   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatGetCommunicationStructs"
5469 /*@C
5470   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5471 
5472   Not Collective
5473 
5474   Input Parameters:
5475 . A - The matrix in mpiaij format
5476 
5477   Output Parameter:
5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5479 . colmap - A map from global column index to local index into lvec
5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5481 
5482   Level: developer
5483 
5484 @*/
5485 #if defined(PETSC_USE_CTABLE)
5486 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5487 #else
5488 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5489 #endif
5490 {
5491   Mat_MPIAIJ *a;
5492 
5493   PetscFunctionBegin;
5494   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5495   PetscValidPointer(lvec, 2);
5496   PetscValidPointer(colmap, 3);
5497   PetscValidPointer(multScatter, 4);
5498   a = (Mat_MPIAIJ*) A->data;
5499   if (lvec) *lvec = a->lvec;
5500   if (colmap) *colmap = a->colmap;
5501   if (multScatter) *multScatter = a->Mvctx;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 EXTERN_C_BEGIN
5506 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5507 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5508 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5509 EXTERN_C_END
5510 
5511 #undef __FUNCT__
5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5513 /*
5514     Computes (B'*A')' since computing B*A directly is untenable
5515 
5516                n                       p                          p
5517         (              )       (              )         (                  )
5518       m (      A       )  *  n (       B      )   =   m (         C        )
5519         (              )       (              )         (                  )
5520 
5521 */
5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5523 {
5524   PetscErrorCode ierr;
5525   Mat            At,Bt,Ct;
5526 
5527   PetscFunctionBegin;
5528   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5529   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5530   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5531   ierr = MatDestroy(&At);CHKERRQ(ierr);
5532   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5533   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5534   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5535   PetscFunctionReturn(0);
5536 }
5537 
5538 #undef __FUNCT__
5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5541 {
5542   PetscErrorCode ierr;
5543   PetscInt       m=A->rmap->n,n=B->cmap->n;
5544   Mat            Cmat;
5545 
5546   PetscFunctionBegin;
5547   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5548   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5549   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5550   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5551   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5552   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5553   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5554   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555 
5556   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5557 
5558   *C = Cmat;
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 /* ----------------------------------------------------------------*/
5563 #undef __FUNCT__
5564 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5565 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5566 {
5567   PetscErrorCode ierr;
5568 
5569   PetscFunctionBegin;
5570   if (scall == MAT_INITIAL_MATRIX) {
5571     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5572   }
5573   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5574   PetscFunctionReturn(0);
5575 }
5576 
5577 EXTERN_C_BEGIN
5578 #if defined(PETSC_HAVE_MUMPS)
5579 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5580 #endif
5581 #if defined(PETSC_HAVE_PASTIX)
5582 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5583 #endif
5584 #if defined(PETSC_HAVE_SUPERLU_DIST)
5585 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5586 #endif
5587 #if defined(PETSC_HAVE_CLIQUE)
5588 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5589 #endif
5590 EXTERN_C_END
5591 
5592 /*MC
5593    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5594 
5595    Options Database Keys:
5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5597 
5598   Level: beginner
5599 
5600 .seealso: MatCreateAIJ()
5601 M*/
5602 
5603 EXTERN_C_BEGIN
5604 #undef __FUNCT__
5605 #define __FUNCT__ "MatCreate_MPIAIJ"
5606 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5607 {
5608   Mat_MPIAIJ     *b;
5609   PetscErrorCode ierr;
5610   PetscMPIInt    size;
5611 
5612   PetscFunctionBegin;
5613   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5614 
5615   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5616   B->data       = (void*)b;
5617   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5618   B->assembled  = PETSC_FALSE;
5619   B->insertmode = NOT_SET_VALUES;
5620   b->size       = size;
5621 
5622   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5623 
5624   /* build cache for off array entries formed */
5625   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5626 
5627   b->donotstash  = PETSC_FALSE;
5628   b->colmap      = 0;
5629   b->garray      = 0;
5630   b->roworiented = PETSC_TRUE;
5631 
5632   /* stuff used for matrix vector multiply */
5633   b->lvec  = PETSC_NULL;
5634   b->Mvctx = PETSC_NULL;
5635 
5636   /* stuff for MatGetRow() */
5637   b->rowindices   = 0;
5638   b->rowvalues    = 0;
5639   b->getrowactive = PETSC_FALSE;
5640 
5641   /* flexible pointer used in CUSP/CUSPARSE classes */
5642   b->spptr = PETSC_NULL;
5643 
5644 #if defined(PETSC_HAVE_MUMPS)
5645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5646                                            "MatGetFactor_aij_mumps",
5647                                            MatGetFactor_aij_mumps);CHKERRQ(ierr);
5648 #endif
5649 #if defined(PETSC_HAVE_PASTIX)
5650   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5651                                            "MatGetFactor_mpiaij_pastix",
5652                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5653 #endif
5654 #if defined(PETSC_HAVE_SUPERLU_DIST)
5655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5656                                            "MatGetFactor_mpiaij_superlu_dist",
5657                                            MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5658 #endif
5659 #if defined(PETSC_HAVE_CLIQUE)
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5661                                            "MatGetFactor_aij_clique",
5662                                            MatGetFactor_aij_clique);CHKERRQ(ierr);
5663 #endif
5664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5665                                            "MatStoreValues_MPIAIJ",
5666                                            MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5668                                            "MatRetrieveValues_MPIAIJ",
5669                                            MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5671                                            "MatGetDiagonalBlock_MPIAIJ",
5672                                            MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5674                                            "MatIsTranspose_MPIAIJ",
5675                                            MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5677                                            "MatMPIAIJSetPreallocation_MPIAIJ",
5678                                            MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5680                                            "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5681                                            MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5683                                            "MatDiagonalScaleLocal_MPIAIJ",
5684                                            MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5686                                            "MatConvert_MPIAIJ_MPIAIJPERM",
5687                                            MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5689                                            "MatConvert_MPIAIJ_MPIAIJCRL",
5690                                            MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5692                                            "MatConvert_MPIAIJ_MPISBAIJ",
5693                                            MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5695                                            "MatMatMult_MPIDense_MPIAIJ",
5696                                            MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5698                                            "MatMatMultSymbolic_MPIDense_MPIAIJ",
5699                                            MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5701                                            "MatMatMultNumeric_MPIDense_MPIAIJ",
5702                                            MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5703   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5704   PetscFunctionReturn(0);
5705 }
5706 EXTERN_C_END
5707 
5708 #undef __FUNCT__
5709 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5710 /*@
5711      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5712          and "off-diagonal" part of the matrix in CSR format.
5713 
5714    Collective on MPI_Comm
5715 
5716    Input Parameters:
5717 +  comm - MPI communicator
5718 .  m - number of local rows (Cannot be PETSC_DECIDE)
5719 .  n - This value should be the same as the local size used in creating the
5720        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5721        calculated if N is given) For square matrices n is almost always m.
5722 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5723 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5724 .   i - row indices for "diagonal" portion of matrix
5725 .   j - column indices
5726 .   a - matrix values
5727 .   oi - row indices for "off-diagonal" portion of matrix
5728 .   oj - column indices
5729 -   oa - matrix values
5730 
5731    Output Parameter:
5732 .   mat - the matrix
5733 
5734    Level: advanced
5735 
5736    Notes:
5737        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5738        must free the arrays once the matrix has been destroyed and not before.
5739 
5740        The i and j indices are 0 based
5741 
5742        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5743 
5744        This sets local rows and cannot be used to set off-processor values.
5745 
5746        You cannot later use MatSetValues() to change values in this matrix.
5747 
5748 .keywords: matrix, aij, compressed row, sparse, parallel
5749 
5750 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5751           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5752 @*/
5753 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5754 {
5755   PetscErrorCode ierr;
5756   Mat_MPIAIJ     *maij;
5757 
5758   PetscFunctionBegin;
5759   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5760   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5761   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5762   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5763   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5764   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5765   maij = (Mat_MPIAIJ*) (*mat)->data;
5766 
5767   maij->donotstash     = PETSC_TRUE;
5768   (*mat)->preallocated = PETSC_TRUE;
5769 
5770   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5771   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5772 
5773   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5774   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5775 
5776   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5777   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5778   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5779   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5780 
5781   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5782   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5783   PetscFunctionReturn(0);
5784 }
5785 
5786 /*
5787     Special version for direct calls from Fortran
5788 */
5789 #include <petsc-private/fortranimpl.h>
5790 
5791 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5792 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5793 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5794 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5795 #endif
5796 
5797 /* Change these macros so can be used in void function */
5798 #undef CHKERRQ
5799 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5800 #undef SETERRQ2
5801 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5802 #undef SETERRQ3
5803 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5804 #undef SETERRQ
5805 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5806 
5807 EXTERN_C_BEGIN
5808 #undef __FUNCT__
5809 #define __FUNCT__ "matsetvaluesmpiaij_"
5810 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5811 {
5812   Mat            mat  = *mmat;
5813   PetscInt       m    = *mm, n = *mn;
5814   InsertMode     addv = *maddv;
5815   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5816   PetscScalar    value;
5817   PetscErrorCode ierr;
5818 
5819   MatCheckPreallocated(mat,1);
5820   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5821 
5822 #if defined(PETSC_USE_DEBUG)
5823   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5824 #endif
5825   {
5826     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5827     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5828     PetscBool roworiented = aij->roworiented;
5829 
5830     /* Some Variables required in the macro */
5831     Mat        A                 = aij->A;
5832     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5833     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5834     MatScalar  *aa               = a->a;
5835     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5836     Mat        B                 = aij->B;
5837     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5838     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5839     MatScalar  *ba               = b->a;
5840 
5841     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5842     PetscInt  nonew = a->nonew;
5843     MatScalar *ap1,*ap2;
5844 
5845     PetscFunctionBegin;
5846     for (i=0; i<m; i++) {
5847       if (im[i] < 0) continue;
5848 #if defined(PETSC_USE_DEBUG)
5849       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5850 #endif
5851       if (im[i] >= rstart && im[i] < rend) {
5852         row      = im[i] - rstart;
5853         lastcol1 = -1;
5854         rp1      = aj + ai[row];
5855         ap1      = aa + ai[row];
5856         rmax1    = aimax[row];
5857         nrow1    = ailen[row];
5858         low1     = 0;
5859         high1    = nrow1;
5860         lastcol2 = -1;
5861         rp2      = bj + bi[row];
5862         ap2      = ba + bi[row];
5863         rmax2    = bimax[row];
5864         nrow2    = bilen[row];
5865         low2     = 0;
5866         high2    = nrow2;
5867 
5868         for (j=0; j<n; j++) {
5869           if (roworiented) value = v[i*n+j];
5870           else value = v[i+j*m];
5871           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5872           if (in[j] >= cstart && in[j] < cend) {
5873             col = in[j] - cstart;
5874             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5875           } else if (in[j] < 0) continue;
5876 #if defined(PETSC_USE_DEBUG)
5877           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5878 #endif
5879           else {
5880             if (mat->was_assembled) {
5881               if (!aij->colmap) {
5882                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5883               }
5884 #if defined(PETSC_USE_CTABLE)
5885               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5886               col--;
5887 #else
5888               col = aij->colmap[in[j]] - 1;
5889 #endif
5890               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5891                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5892                 col  =  in[j];
5893                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5894                 B     = aij->B;
5895                 b     = (Mat_SeqAIJ*)B->data;
5896                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5897                 rp2   = bj + bi[row];
5898                 ap2   = ba + bi[row];
5899                 rmax2 = bimax[row];
5900                 nrow2 = bilen[row];
5901                 low2  = 0;
5902                 high2 = nrow2;
5903                 bm    = aij->B->rmap->n;
5904                 ba    = b->a;
5905               }
5906             } else col = in[j];
5907             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5908           }
5909         }
5910       } else if (!aij->donotstash) {
5911         if (roworiented) {
5912           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5913         } else {
5914           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5915         }
5916       }
5917     }
5918   }
5919   PetscFunctionReturnVoid();
5920 }
5921 EXTERN_C_END
5922 
5923