xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 047240e14af00aad1ef65e96f6fface8924f7f7e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia        = a->i;
58   ib        = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75 ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt  = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99 ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr   = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213 
214     rowners[0] = 0;
215     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
216     rstart = rowners[rank];
217     rend   = rowners[rank+1];
218     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
219     if (!rank) {
220       gmata = (Mat_SeqAIJ*) gmat->data;
221       /* send row lengths to all processors */
222       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
223       for (i=1; i<size; i++) {
224         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
225       }
226       /* determine number diagonal and off-diagonal counts */
227       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
228       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
229       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
230       jj   = 0;
231       for (i=0; i<m; i++) {
232         for (j=0; j<dlens[i]; j++) {
233           if (gmata->j[jj] < rstart) ld[i]++;
234           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
235           jj++;
236         }
237       }
238       /* send column indices to other processes */
239       for (i=1; i<size; i++) {
240         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
241         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
242         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243       }
244 
245       /* send numerical values to other processes */
246       for (i=1; i<size; i++) {
247         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
248         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
249       }
250       gmataa = gmata->a;
251       gmataj = gmata->j;
252 
253     } else {
254       /* receive row lengths */
255       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       /* receive column indices */
257       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
259       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
260       /* determine number diagonal and off-diagonal counts */
261       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
262       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
263       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   CHKMEMQ;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350   Local utility routine that creates a mapping from the global column
351 number to the local number in the off-diagonal part of the local
352 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
353 a slightly higher hash table cost; without it it is not scalable (each processor
354 has an order N integer array but is fast to acess.
355 */
356 #undef __FUNCT__
357 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
358 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
359 {
360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
361   PetscErrorCode ierr;
362   PetscInt       n = aij->B->cmap->n,i;
363 
364   PetscFunctionBegin;
365   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
366 #if defined(PETSC_USE_CTABLE)
367   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
368   for (i=0; i<n; i++) {
369     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
370   }
371 #else
372   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
373   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
376 #endif
377   PetscFunctionReturn(0);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
381 { \
382     if (col <= lastcol1)  low1 = 0;     \
383     else                 high1 = nrow1; \
384     lastcol1 = col;\
385     while (high1-low1 > 5) { \
386       t = (low1+high1)/2; \
387       if (rp1[t] > col) high1 = t; \
388       else              low1  = t; \
389     } \
390       for (_i=low1; _i<high1; _i++) { \
391         if (rp1[_i] > col) break; \
392         if (rp1[_i] == col) { \
393           if (addv == ADD_VALUES) ap1[_i] += value;   \
394           else                    ap1[_i] = value; \
395           goto a_noinsert; \
396         } \
397       }  \
398       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
399       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
400       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
401       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
402       N = nrow1++ - 1; a->nz++; high1++; \
403       /* shift up all the later entries in this row */ \
404       for (ii=N; ii>=_i; ii--) { \
405         rp1[ii+1] = rp1[ii]; \
406         ap1[ii+1] = ap1[ii]; \
407       } \
408       rp1[_i] = col;  \
409       ap1[_i] = value;  \
410       a_noinsert: ; \
411       ailen[row] = nrow1; \
412 }
413 
414 
415 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
416   { \
417     if (col <= lastcol2) low2 = 0;                        \
418     else high2 = nrow2;                                   \
419     lastcol2 = col;                                       \
420     while (high2-low2 > 5) {                              \
421       t = (low2+high2)/2;                                 \
422       if (rp2[t] > col) high2 = t;                        \
423       else             low2  = t;                         \
424     }                                                     \
425     for (_i=low2; _i<high2; _i++) {                       \
426       if (rp2[_i] > col) break;                           \
427       if (rp2[_i] == col) {                               \
428         if (addv == ADD_VALUES) ap2[_i] += value;         \
429         else                    ap2[_i] = value;          \
430         goto b_noinsert;                                  \
431       }                                                   \
432     }                                                     \
433     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
434     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
435     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
436     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
437     N = nrow2++ - 1; b->nz++; high2++;                    \
438     /* shift up all the later entries in this row */      \
439     for (ii=N; ii>=_i; ii--) {                            \
440       rp2[ii+1] = rp2[ii];                                \
441       ap2[ii+1] = ap2[ii];                                \
442     }                                                     \
443     rp2[_i] = col;                                        \
444     ap2[_i] = value;                                      \
445     b_noinsert: ;                                         \
446     bilen[row] = nrow2;                                   \
447   }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
452 {
453   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
454   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
455   PetscErrorCode ierr;
456   PetscInt       l,*garray = mat->garray,diag;
457 
458   PetscFunctionBegin;
459   /* code only works for square matrices A */
460 
461   /* find size of row to the left of the diagonal part */
462   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
463   row  = row - diag;
464   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
465     if (garray[b->j[b->i[row]+l]] > diag) break;
466   }
467   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* diagonal part */
470   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
471 
472   /* right of diagonal part */
473   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatSetValues_MPIAIJ"
479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   PetscScalar    value;
483   PetscErrorCode ierr;
484   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
485   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
486   PetscBool      roworiented = aij->roworiented;
487 
488   /* Some Variables required in the macro */
489   Mat        A                 = aij->A;
490   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
491   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
492   MatScalar  *aa               = a->a;
493   PetscBool  ignorezeroentries = a->ignorezeroentries;
494   Mat        B                 = aij->B;
495   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
496   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
497   MatScalar  *ba               = b->a;
498 
499   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
500   PetscInt  nonew;
501   MatScalar *ap1,*ap2;
502 
503   PetscFunctionBegin;
504   if (v) PetscValidScalarPointer(v,6);
505   for (i=0; i<m; i++) {
506     if (im[i] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
509 #endif
510     if (im[i] >= rstart && im[i] < rend) {
511       row      = im[i] - rstart;
512       lastcol1 = -1;
513       rp1      = aj + ai[row];
514       ap1      = aa + ai[row];
515       rmax1    = aimax[row];
516       nrow1    = ailen[row];
517       low1     = 0;
518       high1    = nrow1;
519       lastcol2 = -1;
520       rp2      = bj + bi[row];
521       ap2      = ba + bi[row];
522       rmax2    = bimax[row];
523       nrow2    = bilen[row];
524       low2     = 0;
525       high2    = nrow2;
526 
527       for (j=0; j<n; j++) {
528         if (v) {
529           if (roworiented) value = v[i*n+j];
530           else             value = v[i+j*m];
531         } else value = 0.0;
532         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
533         if (in[j] >= cstart && in[j] < cend) {
534           col   = in[j] - cstart;
535           nonew = a->nonew;
536           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
537         } else if (in[j] < 0) continue;
538 #if defined(PETSC_USE_DEBUG)
539         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
540 #endif
541         else {
542           if (mat->was_assembled) {
543             if (!aij->colmap) {
544               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
545             }
546 #if defined(PETSC_USE_CTABLE)
547             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
548             col--;
549 #else
550             col = aij->colmap[in[j]] - 1;
551 #endif
552             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
553               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
554               col  =  in[j];
555               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
556               B     = aij->B;
557               b     = (Mat_SeqAIJ*)B->data;
558               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
559               rp2   = bj + bi[row];
560               ap2   = ba + bi[row];
561               rmax2 = bimax[row];
562               nrow2 = bilen[row];
563               low2  = 0;
564               high2 = nrow2;
565               bm    = aij->B->rmap->n;
566               ba    = b->a;
567             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
568           } else col = in[j];
569           nonew = b->nonew;
570           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
571         }
572       }
573     } else {
574       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
575       if (!aij->donotstash) {
576         mat->assembled = PETSC_FALSE;
577         if (roworiented) {
578           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         } else {
580           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         }
582       }
583     }
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatGetValues_MPIAIJ"
590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
591 {
592   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
593   PetscErrorCode ierr;
594   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
595   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
596 
597   PetscFunctionBegin;
598   for (i=0; i<m; i++) {
599     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
600     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
601     if (idxm[i] >= rstart && idxm[i] < rend) {
602       row = idxm[i] - rstart;
603       for (j=0; j<n; j++) {
604         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
605         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
606         if (idxn[j] >= cstart && idxn[j] < cend) {
607           col  = idxn[j] - cstart;
608           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
609         } else {
610           if (!aij->colmap) {
611             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
612           }
613 #if defined(PETSC_USE_CTABLE)
614           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
615           col--;
616 #else
617           col = aij->colmap[idxn[j]] - 1;
618 #endif
619           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
620           else {
621             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
622           }
623         }
624       }
625     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
635 {
636   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
637   PetscErrorCode ierr;
638   PetscInt       nstash,reallocs;
639   InsertMode     addv;
640 
641   PetscFunctionBegin;
642   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
643 
644   /* make sure all processors are either in INSERTMODE or ADDMODE */
645   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
646   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
647   mat->insertmode = addv; /* in case this processor had no cache */
648 
649   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
650   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
651   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
658 {
659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
660   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
661   PetscErrorCode ierr;
662   PetscMPIInt    n;
663   PetscInt       i,j,rstart,ncols,flg;
664   PetscInt       *row,*col;
665   PetscBool      other_disassembled;
666   PetscScalar    *val;
667   InsertMode     addv = mat->insertmode;
668 
669   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
670 
671   PetscFunctionBegin;
672   if (!aij->donotstash && !mat->nooffprocentries) {
673     while (1) {
674       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
675       if (!flg) break;
676 
677       for (i=0; i<n; ) {
678         /* Now identify the consecutive vals belonging to the same row */
679         for (j=i,rstart=row[j]; j<n; j++) {
680           if (row[j] != rstart) break;
681         }
682         if (j < n) ncols = j-i;
683         else       ncols = n-i;
684         /* Now assemble all these values with a single function call */
685         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
686 
687         i = j;
688       }
689     }
690     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
691   }
692   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
693   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
694 
695   /* determine if any processor has disassembled, if so we must
696      also disassemble ourselfs, in order that we may reassemble. */
697   /*
698      if nonzero structure of submatrix B cannot change then we know that
699      no processor disassembled thus we can skip this stuff
700   */
701   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
702     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
703     if (mat->was_assembled && !other_disassembled) {
704       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
705     }
706   }
707   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
708     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
709   }
710   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
711   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
713   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
714 
715   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
716 
717   aij->rowvalues = 0;
718 
719   /* used by MatAXPY() */
720   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
721   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
722 
723   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
724   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
730 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
731 {
732   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
737   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatZeroRows_MPIAIJ"
743 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
744 {
745   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
748   PetscInt          i,*owners = A->rmap->range;
749   PetscInt          *nprocs,j,idx,nsends,row;
750   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
751   PetscInt          *rvalues,count,base,slen,*source;
752   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
753   MPI_Comm          comm = ((PetscObject)A)->comm;
754   MPI_Request       *send_waits,*recv_waits;
755   MPI_Status        recv_status,*send_status;
756   const PetscScalar *xx;
757   PetscScalar       *bb;
758 #if defined(PETSC_DEBUG)
759   PetscBool found = PETSC_FALSE;
760 #endif
761 
762   PetscFunctionBegin;
763   /*  first count number of contributors to each processor */
764   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
765   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
766   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
767   j    = 0;
768   for (i=0; i<N; i++) {
769     if (lastidx > (idx = rows[i])) j = 0;
770     lastidx = idx;
771     for (; j<size; j++) {
772       if (idx >= owners[j] && idx < owners[j+1]) {
773         nprocs[2*j]++;
774         nprocs[2*j+1] = 1;
775         owner[i]      = j;
776 #if defined(PETSC_DEBUG)
777         found = PETSC_TRUE;
778 #endif
779         break;
780       }
781     }
782 #if defined(PETSC_DEBUG)
783     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
784     found = PETSC_FALSE;
785 #endif
786   }
787   nsends = 0;
788   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
789 
790   if (A->nooffproczerorows) {
791     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
792     nrecvs = nsends;
793     nmax   = N;
794   } else {
795     /* inform other processors of number of messages and max length*/
796     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
797   }
798 
799   /* post receives:   */
800   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
801   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
802   for (i=0; i<nrecvs; i++) {
803     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
804   }
805 
806   /* do sends:
807       1) starts[i] gives the starting index in svalues for stuff going to
808          the ith processor
809   */
810   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
811   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
812   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
813 
814   starts[0] = 0;
815   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
816   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
817 
818   starts[0] = 0;
819   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
820   count = 0;
821   for (i=0; i<size; i++) {
822     if (nprocs[2*i+1]) {
823       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
824     }
825   }
826   ierr = PetscFree(starts);CHKERRQ(ierr);
827 
828   base = owners[rank];
829 
830   /*  wait on receives */
831   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
832   count = nrecvs; slen = 0;
833   while (count) {
834     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
835     /* unpack receives into our local space */
836     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
837 
838     source[imdex] = recv_status.MPI_SOURCE;
839     lens[imdex]   = n;
840     slen         += n;
841     count--;
842   }
843   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
844 
845   /* move the data into the send scatter */
846   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
847   count = 0;
848   for (i=0; i<nrecvs; i++) {
849     values = rvalues + i*nmax;
850     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
851   }
852   ierr = PetscFree(rvalues);CHKERRQ(ierr);
853   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
854   ierr = PetscFree(owner);CHKERRQ(ierr);
855   ierr = PetscFree(nprocs);CHKERRQ(ierr);
856 
857   /* fix right hand side if needed */
858   if (x && b) {
859     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
860     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
861     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
862     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
864   }
865   /*
866         Zero the required rows. If the "diagonal block" of the matrix
867      is square and the user wishes to set the diagonal we use separate
868      code so that MatSetValues() is not called for each diagonal allocating
869      new memory, thus calling lots of mallocs and slowing things down.
870 
871   */
872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
873   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
875     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
876   } else if (diag != 0.0) {
877     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
879     for (i = 0; i < slen; i++) {
880       row  = lrows[i] + rstart;
881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
882     }
883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885   } else {
886     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
887   }
888   ierr = PetscFree(lrows);CHKERRQ(ierr);
889 
890   /* wait on sends */
891   if (nsends) {
892     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
893     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
894     ierr = PetscFree(send_status);CHKERRQ(ierr);
895   }
896   ierr = PetscFree(send_waits);CHKERRQ(ierr);
897   ierr = PetscFree(svalues);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
903 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
904 {
905   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
906   PetscErrorCode    ierr;
907   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
908   PetscInt          i,*owners = A->rmap->range;
909   PetscInt          *nprocs,j,idx,nsends;
910   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
911   PetscInt          *rvalues,count,base,slen,*source;
912   PetscInt          *lens,*lrows,*values,m;
913   MPI_Comm          comm = ((PetscObject)A)->comm;
914   MPI_Request       *send_waits,*recv_waits;
915   MPI_Status        recv_status,*send_status;
916   const PetscScalar *xx;
917   PetscScalar       *bb,*mask;
918   Vec               xmask,lmask;
919   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
920   const PetscInt    *aj, *ii,*ridx;
921   PetscScalar       *aa;
922 #if defined(PETSC_DEBUG)
923   PetscBool found = PETSC_FALSE;
924 #endif
925 
926   PetscFunctionBegin;
927   /*  first count number of contributors to each processor */
928   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
929   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
930   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
931   j    = 0;
932   for (i=0; i<N; i++) {
933     if (lastidx > (idx = rows[i])) j = 0;
934     lastidx = idx;
935     for (; j<size; j++) {
936       if (idx >= owners[j] && idx < owners[j+1]) {
937         nprocs[2*j]++;
938         nprocs[2*j+1] = 1;
939         owner[i]      = j;
940 #if defined(PETSC_DEBUG)
941         found = PETSC_TRUE;
942 #endif
943         break;
944       }
945     }
946 #if defined(PETSC_DEBUG)
947     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
948     found = PETSC_FALSE;
949 #endif
950   }
951   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
952 
953   /* inform other processors of number of messages and max length*/
954   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
955 
956   /* post receives:   */
957   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
958   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
959   for (i=0; i<nrecvs; i++) {
960     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
961   }
962 
963   /* do sends:
964       1) starts[i] gives the starting index in svalues for stuff going to
965          the ith processor
966   */
967   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
968   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
969   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
970 
971   starts[0] = 0;
972   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
973   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994 
995     source[imdex] = recv_status.MPI_SOURCE;
996     lens[imdex]   = n;
997     slen         += n;
998     count--;
999   }
1000   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1001 
1002   /* move the data into the send scatter */
1003   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1004   count = 0;
1005   for (i=0; i<nrecvs; i++) {
1006     values = rvalues + i*nmax;
1007     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1008   }
1009   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1010   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1011   ierr = PetscFree(owner);CHKERRQ(ierr);
1012   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1013   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1014 
1015   /* zero diagonal part of matrix */
1016   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1017 
1018   /* handle off diagonal part of matrix */
1019   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1020   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1021   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1022   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1023   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1024   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1027   if (x) {
1028     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1031     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1032   }
1033   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1034 
1035   /* remove zeroed rows of off diagonal matrix */
1036   ii = aij->i;
1037   for (i=0; i<slen; i++) {
1038     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1039   }
1040 
1041   /* loop over all elements of off process part of matrix zeroing removed columns*/
1042   if (aij->compressedrow.use) {
1043     m    = aij->compressedrow.nrows;
1044     ii   = aij->compressedrow.i;
1045     ridx = aij->compressedrow.rindex;
1046     for (i=0; i<m; i++) {
1047       n  = ii[i+1] - ii[i];
1048       aj = aij->j + ii[i];
1049       aa = aij->a + ii[i];
1050 
1051       for (j=0; j<n; j++) {
1052         if (PetscAbsScalar(mask[*aj])) {
1053           if (b) bb[*ridx] -= *aa*xx[*aj];
1054           *aa = 0.0;
1055         }
1056         aa++;
1057         aj++;
1058       }
1059       ridx++;
1060     }
1061   } else { /* do not use compressed row format */
1062     m = l->B->rmap->n;
1063     for (i=0; i<m; i++) {
1064       n  = ii[i+1] - ii[i];
1065       aj = aij->j + ii[i];
1066       aa = aij->a + ii[i];
1067       for (j=0; j<n; j++) {
1068         if (PetscAbsScalar(mask[*aj])) {
1069           if (b) bb[i] -= *aa*xx[*aj];
1070           *aa = 0.0;
1071         }
1072         aa++;
1073         aj++;
1074       }
1075     }
1076   }
1077   if (x) {
1078     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1079     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1080   }
1081   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1082   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1083   ierr = PetscFree(lrows);CHKERRQ(ierr);
1084 
1085   /* wait on sends */
1086   if (nsends) {
1087     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1088     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1089     ierr = PetscFree(send_status);CHKERRQ(ierr);
1090   }
1091   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1092   ierr = PetscFree(svalues);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMult_MPIAIJ"
1098 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102   PetscInt       nt;
1103 
1104   PetscFunctionBegin;
1105   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1106   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1128 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1129 {
1130   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1136   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1143 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1144 {
1145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscBool      merged;
1148 
1149   PetscFunctionBegin;
1150   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   if (!merged) {
1154     /* send it on its way */
1155     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156     /* do local part */
1157     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1158     /* receive remote parts: note this assumes the values are not actually */
1159     /* added in yy until the next line, */
1160     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   } else {
1162     /* do local part */
1163     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1164     /* send it on its way */
1165     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1166     /* values actually were received in the Begin() but we need to call this nop */
1167     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 EXTERN_C_BEGIN
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1175 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1176 {
1177   MPI_Comm       comm;
1178   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1179   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1180   IS             Me,Notme;
1181   PetscErrorCode ierr;
1182   PetscInt       M,N,first,last,*notme,i;
1183   PetscMPIInt    size;
1184 
1185   PetscFunctionBegin;
1186   /* Easy test: symmetric diagonal block */
1187   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1188   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1189   if (!*f) PetscFunctionReturn(0);
1190   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1192   if (size == 1) PetscFunctionReturn(0);
1193 
1194   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1195   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1196   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1197   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1198   for (i=0; i<first; i++) notme[i] = i;
1199   for (i=last; i<M; i++) notme[i-last+first] = i;
1200   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1201   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1202   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1203   Aoff = Aoffs[0];
1204   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1205   Boff = Boffs[0];
1206   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1207   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1209   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1211   ierr = PetscFree(notme);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* send it on its way */
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   /* do local part */
1229   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1230   /* receive remote parts */
1231   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*
1236   This only works correctly for square matrices where the subblock A->A is the
1237    diagonal block
1238 */
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1242 {
1243   PetscErrorCode ierr;
1244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1248   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1249   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatScale_MPIAIJ"
1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1256 {
1257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1262   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIAIJ"
1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1269 {
1270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1279   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1281 #if defined(PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1311   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode ierr;
1313   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1314   int            fd;
1315   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1316   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1317   PetscScalar    *column_values;
1318   PetscInt       message_count,flowcontrolcount;
1319   FILE           *file;
1320 
1321   PetscFunctionBegin;
1322   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1323   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1324   nz   = A->nz + B->nz;
1325   if (!rank) {
1326     header[0] = MAT_FILE_CLASSID;
1327     header[1] = mat->rmap->N;
1328     header[2] = mat->cmap->N;
1329 
1330     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1331     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1332     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1333     /* get largest number of rows any processor has */
1334     rlen  = mat->rmap->n;
1335     range = mat->rmap->range;
1336     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1337   } else {
1338     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339     rlen = mat->rmap->n;
1340   }
1341 
1342   /* load up the local row counts */
1343   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1344   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1345 
1346   /* store the row lengths to the file */
1347   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1348   if (!rank) {
1349     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1350     for (i=1; i<size; i++) {
1351       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1352       rlen = range[i+1] - range[i];
1353       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1360     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1361   }
1362   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1363 
1364   /* load up the local column indices */
1365   nzmax = nz; /* th processor needs space a largest processor needs */
1366   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1367   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1368   cnt   = 0;
1369   for (i=0; i<mat->rmap->n; i++) {
1370     for (j=B->i[i]; j<B->i[i+1]; j++) {
1371       if ((col = garray[B->j[j]]) > cstart) break;
1372       column_indices[cnt++] = col;
1373     }
1374     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1375     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1376   }
1377   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1378 
1379   /* store the column indices to the file */
1380   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1381   if (!rank) {
1382     MPI_Status status;
1383     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1384     for (i=1; i<size; i++) {
1385       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1386       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1387       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1388       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1389       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1390     }
1391     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1392   } else {
1393     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1394     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1395     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1396     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1397   }
1398   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1399 
1400   /* load up the local column values */
1401   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1402   cnt  = 0;
1403   for (i=0; i<mat->rmap->n; i++) {
1404     for (j=B->i[i]; j<B->i[i+1]; j++) {
1405       if (garray[B->j[j]] > cstart) break;
1406       column_values[cnt++] = B->a[j];
1407     }
1408     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1409     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1410   }
1411   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1412 
1413   /* store the column values to the file */
1414   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1415   if (!rank) {
1416     MPI_Status status;
1417     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     for (i=1; i<size; i++) {
1419       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1420       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1421       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1422       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1424     }
1425     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1426   } else {
1427     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1428     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1429     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1430     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1431   }
1432   ierr = PetscFree(column_values);CHKERRQ(ierr);
1433 
1434   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1435   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1441 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1442 {
1443   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1444   PetscErrorCode    ierr;
1445   PetscMPIInt       rank = aij->rank,size = aij->size;
1446   PetscBool         isdraw,iascii,isbinary;
1447   PetscViewer       sviewer;
1448   PetscViewerFormat format;
1449 
1450   PetscFunctionBegin;
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1454   if (iascii) {
1455     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1456     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1457       MatInfo   info;
1458       PetscBool inodes;
1459 
1460       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1461       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt**)&inodes,PETSC_NULL);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1464       if (!inodes) {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1466                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       } else {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       }
1471       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1473       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1475       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1478       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1479       PetscFunctionReturn(0);
1480     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1481       PetscInt inodecount,inodelimit,*inodes;
1482       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1483       if (inodes) {
1484         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1485       } else {
1486         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1487       }
1488       PetscFunctionReturn(0);
1489     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1490       PetscFunctionReturn(0);
1491     }
1492   } else if (isbinary) {
1493     if (size == 1) {
1494       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1495       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1496     } else {
1497       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1498     }
1499     PetscFunctionReturn(0);
1500   } else if (isdraw) {
1501     PetscDraw draw;
1502     PetscBool isnull;
1503     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1504     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1505   }
1506 
1507   if (size == 1) {
1508     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1509     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1510   } else {
1511     /* assemble the entire matrix onto first processor. */
1512     Mat        A;
1513     Mat_SeqAIJ *Aloc;
1514     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1515     MatScalar  *a;
1516 
1517     if (mat->rmap->N > 1024) {
1518       PetscBool flg = PETSC_FALSE;
1519 
1520       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1521       if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1522     }
1523 
1524     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1525     if (!rank) {
1526       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1527     } else {
1528       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1529     }
1530     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1531     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1532     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1533     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1534     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1535 
1536     /* copy over the A part */
1537     Aloc = (Mat_SeqAIJ*)aij->A->data;
1538     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1539     row  = mat->rmap->rstart;
1540     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++;
1544       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1545     }
1546     aj = Aloc->j;
1547     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1548 
1549     /* copy over the B part */
1550     Aloc = (Mat_SeqAIJ*)aij->B->data;
1551     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1552     row  = mat->rmap->rstart;
1553     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1554     ct   = cols;
1555     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1556     for (i=0; i<m; i++) {
1557       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1558       row++;
1559       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1560     }
1561     ierr = PetscFree(ct);CHKERRQ(ierr);
1562     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1563     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1564     /*
1565        Everyone has to call to draw the matrix since the graphics waits are
1566        synchronized across all processors that share the PetscDraw object
1567     */
1568     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1569     if (!rank) {
1570       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1571       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1572       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1573       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1574     }
1575     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1576     ierr = MatDestroy(&A);CHKERRQ(ierr);
1577   }
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatView_MPIAIJ"
1583 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1584 {
1585   PetscErrorCode ierr;
1586   PetscBool      iascii,isdraw,issocket,isbinary;
1587 
1588   PetscFunctionBegin;
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1590   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1593   if (iascii || isdraw || isbinary || issocket) {
1594     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatSOR_MPIAIJ"
1601 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1602 {
1603   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1604   PetscErrorCode ierr;
1605   Vec            bb1 = 0;
1606   PetscBool      hasop;
1607 
1608   PetscFunctionBegin;
1609   if (flag == SOR_APPLY_UPPER) {
1610     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1611     PetscFunctionReturn(0);
1612   }
1613 
1614   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1615     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1616   }
1617 
1618   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1619     if (flag & SOR_ZERO_INITIAL_GUESS) {
1620       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1621       its--;
1622     }
1623 
1624     while (its--) {
1625       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1627 
1628       /* update rhs: bb1 = bb - B*x */
1629       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1630       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1631 
1632       /* local sweep */
1633       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1634     }
1635   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1636     if (flag & SOR_ZERO_INITIAL_GUESS) {
1637       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1638       its--;
1639     }
1640     while (its--) {
1641       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643 
1644       /* update rhs: bb1 = bb - B*x */
1645       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1646       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1647 
1648       /* local sweep */
1649       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1650     }
1651   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1652     if (flag & SOR_ZERO_INITIAL_GUESS) {
1653       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1654       its--;
1655     }
1656     while (its--) {
1657       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659 
1660       /* update rhs: bb1 = bb - B*x */
1661       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1662       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1663 
1664       /* local sweep */
1665       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1666     }
1667   } else if (flag & SOR_EISENSTAT) {
1668     Vec xx1;
1669 
1670     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1671     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1672 
1673     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675     if (!mat->diag) {
1676       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1677       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1678     }
1679     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1680     if (hasop) {
1681       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1682     } else {
1683       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1684     }
1685     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1686 
1687     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1688 
1689     /* local sweep */
1690     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1691     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1692     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1693   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1694 
1695   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatPermute_MPIAIJ"
1701 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1702 {
1703   Mat            aA,aB,Aperm;
1704   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1705   PetscScalar    *aa,*ba;
1706   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1707   PetscSF        rowsf,sf;
1708   IS             parcolp = PETSC_NULL;
1709   PetscBool      done;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1714   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1715   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1716   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1717 
1718   /* Invert row permutation to find out where my rows should go */
1719   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1720   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1721   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1722   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1723   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1724   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1725 
1726   /* Invert column permutation to find out where my columns should go */
1727   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1728   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1729   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1730   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1731   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1732   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1733   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1734 
1735   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1738 
1739   /* Find out where my gcols should go */
1740   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1741   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1742   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1743   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1744   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1745   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1746   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1747   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1748 
1749   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1750   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1751   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1753   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1754   for (i=0; i<m; i++) {
1755     PetscInt row = rdest[i],rowner;
1756     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1757     for (j=ai[i]; j<ai[i+1]; j++) {
1758       PetscInt cowner,col = cdest[aj[j]];
1759       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1760       if (rowner == cowner) dnnz[i]++;
1761       else onnz[i]++;
1762     }
1763     for (j=bi[i]; j<bi[i+1]; j++) {
1764       PetscInt cowner,col = gcdest[bj[j]];
1765       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1766       if (rowner == cowner) dnnz[i]++;
1767       else onnz[i]++;
1768     }
1769   }
1770   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1771   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1773   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1774   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1775   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1776   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1777 
1778   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1779   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1780   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1781   for (i=0; i<m; i++) {
1782     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1783     PetscInt rowlen;
1784     rowlen = ai[i+1] - ai[i];
1785     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1786     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1787     rowlen = bi[i+1] - bi[i];
1788     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1789     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1790   }
1791   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1792   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1794   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1795   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1796   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1797   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1798   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1799   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1800   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1801   *B = Aperm;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1807 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1808 {
1809   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1810   Mat            A    = mat->A,B = mat->B;
1811   PetscErrorCode ierr;
1812   PetscReal      isend[5],irecv[5];
1813 
1814   PetscFunctionBegin;
1815   info->block_size = 1.0;
1816   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1817 
1818   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1819   isend[3] = info->memory;  isend[4] = info->mallocs;
1820 
1821   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1822 
1823   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1824   isend[3] += info->memory;  isend[4] += info->mallocs;
1825   if (flag == MAT_LOCAL) {
1826     info->nz_used      = isend[0];
1827     info->nz_allocated = isend[1];
1828     info->nz_unneeded  = isend[2];
1829     info->memory       = isend[3];
1830     info->mallocs      = isend[4];
1831   } else if (flag == MAT_GLOBAL_MAX) {
1832     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1833 
1834     info->nz_used      = irecv[0];
1835     info->nz_allocated = irecv[1];
1836     info->nz_unneeded  = irecv[2];
1837     info->memory       = irecv[3];
1838     info->mallocs      = irecv[4];
1839   } else if (flag == MAT_GLOBAL_SUM) {
1840     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1841 
1842     info->nz_used      = irecv[0];
1843     info->nz_allocated = irecv[1];
1844     info->nz_unneeded  = irecv[2];
1845     info->memory       = irecv[3];
1846     info->mallocs      = irecv[4];
1847   }
1848   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1849   info->fill_ratio_needed = 0;
1850   info->factor_mallocs    = 0;
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatSetOption_MPIAIJ"
1856 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1857 {
1858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1859   PetscErrorCode ierr;
1860 
1861   PetscFunctionBegin;
1862   switch (op) {
1863   case MAT_NEW_NONZERO_LOCATIONS:
1864   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1865   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1866   case MAT_KEEP_NONZERO_PATTERN:
1867   case MAT_NEW_NONZERO_LOCATION_ERR:
1868   case MAT_USE_INODES:
1869   case MAT_IGNORE_ZERO_ENTRIES:
1870     MatCheckPreallocated(A,1);
1871     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1872     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1873     break;
1874   case MAT_ROW_ORIENTED:
1875     a->roworiented = flg;
1876 
1877     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1878     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1879     break;
1880   case MAT_NEW_DIAGONALS:
1881     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1882     break;
1883   case MAT_IGNORE_OFF_PROC_ENTRIES:
1884     a->donotstash = flg;
1885     break;
1886   case MAT_SPD:
1887     A->spd_set = PETSC_TRUE;
1888     A->spd     = flg;
1889     if (flg) {
1890       A->symmetric                  = PETSC_TRUE;
1891       A->structurally_symmetric     = PETSC_TRUE;
1892       A->symmetric_set              = PETSC_TRUE;
1893       A->structurally_symmetric_set = PETSC_TRUE;
1894     }
1895     break;
1896   case MAT_SYMMETRIC:
1897     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1898     break;
1899   case MAT_STRUCTURALLY_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_HERMITIAN:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_SYMMETRY_ETERNAL:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   default:
1909     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatGetRow_MPIAIJ"
1916 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1919   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1920   PetscErrorCode ierr;
1921   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1922   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1923   PetscInt       *cmap,*idx_p;
1924 
1925   PetscFunctionBegin;
1926   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1927   mat->getrowactive = PETSC_TRUE;
1928 
1929   if (!mat->rowvalues && (idx || v)) {
1930     /*
1931         allocate enough space to hold information from the longest row.
1932     */
1933     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1934     PetscInt   max = 1,tmp;
1935     for (i=0; i<matin->rmap->n; i++) {
1936       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1937       if (max < tmp) max = tmp;
1938     }
1939     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1940   }
1941 
1942   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1943   lrow = row - rstart;
1944 
1945   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1946   if (!v)   {pvA = 0; pvB = 0;}
1947   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1948   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1949   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1950   nztot = nzA + nzB;
1951 
1952   cmap = mat->garray;
1953   if (v  || idx) {
1954     if (nztot) {
1955       /* Sort by increasing column numbers, assuming A and B already sorted */
1956       PetscInt imark = -1;
1957       if (v) {
1958         *v = v_p = mat->rowvalues;
1959         for (i=0; i<nzB; i++) {
1960           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1961           else break;
1962         }
1963         imark = i;
1964         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1965         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1966       }
1967       if (idx) {
1968         *idx = idx_p = mat->rowindices;
1969         if (imark > -1) {
1970           for (i=0; i<imark; i++) {
1971             idx_p[i] = cmap[cworkB[i]];
1972           }
1973         } else {
1974           for (i=0; i<nzB; i++) {
1975             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1976             else break;
1977           }
1978           imark = i;
1979         }
1980         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1981         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1982       }
1983     } else {
1984       if (idx) *idx = 0;
1985       if (v)   *v   = 0;
1986     }
1987   }
1988   *nz  = nztot;
1989   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1990   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1996 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1997 {
1998   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1999 
2000   PetscFunctionBegin;
2001   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2002   aij->getrowactive = PETSC_FALSE;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatNorm_MPIAIJ"
2008 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2009 {
2010   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2011   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,cstart = mat->cmap->rstart;
2014   PetscReal      sum = 0.0;
2015   MatScalar      *v;
2016 
2017   PetscFunctionBegin;
2018   if (aij->size == 1) {
2019     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2020   } else {
2021     if (type == NORM_FROBENIUS) {
2022       v = amat->a;
2023       for (i=0; i<amat->nz; i++) {
2024         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2025       }
2026       v = bmat->a;
2027       for (i=0; i<bmat->nz; i++) {
2028         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2029       }
2030       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2031       *norm = PetscSqrtReal(*norm);
2032     } else if (type == NORM_1) { /* max column norm */
2033       PetscReal *tmp,*tmp2;
2034       PetscInt  *jj,*garray = aij->garray;
2035       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2036       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2037       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2038       *norm = 0.0;
2039       v     = amat->a; jj = amat->j;
2040       for (j=0; j<amat->nz; j++) {
2041         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2042       }
2043       v = bmat->a; jj = bmat->j;
2044       for (j=0; j<bmat->nz; j++) {
2045         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2046       }
2047       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2048       for (j=0; j<mat->cmap->N; j++) {
2049         if (tmp2[j] > *norm) *norm = tmp2[j];
2050       }
2051       ierr = PetscFree(tmp);CHKERRQ(ierr);
2052       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2053     } else if (type == NORM_INFINITY) { /* max row norm */
2054       PetscReal ntemp = 0.0;
2055       for (j=0; j<aij->A->rmap->n; j++) {
2056         v   = amat->a + amat->i[j];
2057         sum = 0.0;
2058         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2059           sum += PetscAbsScalar(*v); v++;
2060         }
2061         v = bmat->a + bmat->i[j];
2062         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2063           sum += PetscAbsScalar(*v); v++;
2064         }
2065         if (sum > ntemp) ntemp = sum;
2066       }
2067       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatTranspose_MPIAIJ"
2075 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2076 {
2077   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2078   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2079   PetscErrorCode ierr;
2080   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2081   PetscInt       cstart = A->cmap->rstart,ncol;
2082   Mat            B;
2083   MatScalar      *array;
2084 
2085   PetscFunctionBegin;
2086   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2087 
2088   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2089   ai = Aloc->i; aj = Aloc->j;
2090   bi = Bloc->i; bj = Bloc->j;
2091   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2092     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2093     PetscSFNode          *oloc;
2094     PETSC_UNUSED PetscSF sf;
2095 
2096     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2097     /* compute d_nnz for preallocation */
2098     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2099     for (i=0; i<ai[ma]; i++) {
2100       d_nnz[aj[i]]++;
2101       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2102     }
2103     /* compute local off-diagonal contributions */
2104     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2105     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2106     /* map those to global */
2107     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2108     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2109     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2110     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2111     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2112     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2113     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2114 
2115     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2116     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2117     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2118     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2119     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2120     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2121   } else {
2122     B    = *matout;
2123     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2124     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2125   }
2126 
2127   /* copy over the A part */
2128   array = Aloc->a;
2129   row   = A->rmap->rstart;
2130   for (i=0; i<ma; i++) {
2131     ncol = ai[i+1]-ai[i];
2132     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2133     row++;
2134     array += ncol; aj += ncol;
2135   }
2136   aj = Aloc->j;
2137   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2138 
2139   /* copy over the B part */
2140   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2141   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2142   array = Bloc->a;
2143   row   = A->rmap->rstart;
2144   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2145   cols_tmp = cols;
2146   for (i=0; i<mb; i++) {
2147     ncol = bi[i+1]-bi[i];
2148     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2149     row++;
2150     array += ncol; cols_tmp += ncol;
2151   }
2152   ierr = PetscFree(cols);CHKERRQ(ierr);
2153 
2154   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2157     *matout = B;
2158   } else {
2159     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #undef __FUNCT__
2165 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2166 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2167 {
2168   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2169   Mat            a    = aij->A,b = aij->B;
2170   PetscErrorCode ierr;
2171   PetscInt       s1,s2,s3;
2172 
2173   PetscFunctionBegin;
2174   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2175   if (rr) {
2176     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2177     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2178     /* Overlap communication with computation. */
2179     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2180   }
2181   if (ll) {
2182     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2183     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2184     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2185   }
2186   /* scale  the diagonal block */
2187   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2188 
2189   if (rr) {
2190     /* Do a scatter end and then right scale the off-diagonal block */
2191     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2192     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2193   }
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2199 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2200 {
2201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2202   PetscErrorCode ierr;
2203 
2204   PetscFunctionBegin;
2205   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatEqual_MPIAIJ"
2211 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2212 {
2213   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2214   Mat            a,b,c,d;
2215   PetscBool      flg;
2216   PetscErrorCode ierr;
2217 
2218   PetscFunctionBegin;
2219   a = matA->A; b = matA->B;
2220   c = matB->A; d = matB->B;
2221 
2222   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2223   if (flg) {
2224     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2225   }
2226   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatCopy_MPIAIJ"
2232 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2233 {
2234   PetscErrorCode ierr;
2235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2236   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2237 
2238   PetscFunctionBegin;
2239   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2240   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2241     /* because of the column compression in the off-processor part of the matrix a->B,
2242        the number of columns in a->B and b->B may be different, hence we cannot call
2243        the MatCopy() directly on the two parts. If need be, we can provide a more
2244        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2245        then copying the submatrices */
2246     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2247   } else {
2248     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2249     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2250   }
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatSetUp_MPIAIJ"
2256 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2257 {
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2267 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2268 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2269 {
2270   PetscInt       i,m=Y->rmap->N;
2271   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2272   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2273   const PetscInt *xi = x->i,*yi = y->i;
2274 
2275   PetscFunctionBegin;
2276   /* Set the number of nonzeros in the new matrix */
2277   for (i=0; i<m; i++) {
2278     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2279     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2280     nnz[i] = 0;
2281     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2282       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2283       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2284       nnz[i]++;
2285     }
2286     for (; k<nzy; k++) nnz[i]++;
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatAXPY_MPIAIJ"
2293 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2294 {
2295   PetscErrorCode ierr;
2296   PetscInt       i;
2297   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2298   PetscBLASInt   bnz,one=1;
2299   Mat_SeqAIJ     *x,*y;
2300 
2301   PetscFunctionBegin;
2302   if (str == SAME_NONZERO_PATTERN) {
2303     PetscScalar alpha = a;
2304     x    = (Mat_SeqAIJ*)xx->A->data;
2305     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2306     y    = (Mat_SeqAIJ*)yy->A->data;
2307     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2308     x    = (Mat_SeqAIJ*)xx->B->data;
2309     y    = (Mat_SeqAIJ*)yy->B->data;
2310     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2311     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2312   } else if (str == SUBSET_NONZERO_PATTERN) {
2313     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2314 
2315     x = (Mat_SeqAIJ*)xx->B->data;
2316     y = (Mat_SeqAIJ*)yy->B->data;
2317     if (y->xtoy && y->XtoY != xx->B) {
2318       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2319       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2320     }
2321     if (!y->xtoy) { /* get xtoy */
2322       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2323       y->XtoY = xx->B;
2324       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2325     }
2326     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2327   } else {
2328     Mat      B;
2329     PetscInt *nnz_d,*nnz_o;
2330     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2331     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2332     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2333     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2334     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2335     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2336     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2337     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2338     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2339     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2340     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2341     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2342     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2343     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2344   }
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatConjugate_MPIAIJ"
2352 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2353 {
2354 #if defined(PETSC_USE_COMPLEX)
2355   PetscErrorCode ierr;
2356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2360   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2361 #else
2362   PetscFunctionBegin;
2363 #endif
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatRealPart_MPIAIJ"
2369 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2376   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2382 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2383 {
2384   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2389   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #if defined(PETSC_HAVE_PBGL)
2394 
2395 #include <boost/parallel/mpi/bsp_process_group.hpp>
2396 #include <boost/graph/distributed/ilu_default_graph.hpp>
2397 #include <boost/graph/distributed/ilu_0_block.hpp>
2398 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2399 #include <boost/graph/distributed/petsc/interface.hpp>
2400 #include <boost/multi_array.hpp>
2401 #include <boost/parallel/distributed_property_map->hpp>
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2405 /*
2406   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2407 */
2408 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2409 {
2410   namespace petsc = boost::distributed::petsc;
2411 
2412   namespace graph_dist = boost::graph::distributed;
2413   using boost::graph::distributed::ilu_default::process_group_type;
2414   using boost::graph::ilu_permuted;
2415 
2416   PetscBool      row_identity, col_identity;
2417   PetscContainer c;
2418   PetscInt       m, n, M, N;
2419   PetscErrorCode ierr;
2420 
2421   PetscFunctionBegin;
2422   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2423   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2424   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2425   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2426 
2427   process_group_type pg;
2428   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2429   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2430   lgraph_type& level_graph = *lgraph_p;
2431   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2432 
2433   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2434   ilu_permuted(level_graph);
2435 
2436   /* put together the new matrix */
2437   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2438   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2439   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2440   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2441   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2442   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2443   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2445 
2446   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2447   ierr = PetscContainerSetPointer(c, lgraph_p);
2448   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2449   ierr = PetscContainerDestroy(&c);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2455 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2456 {
2457   PetscFunctionBegin;
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatSolve_MPIAIJ"
2463 /*
2464   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2465 */
2466 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2467 {
2468   namespace graph_dist = boost::graph::distributed;
2469 
2470   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2471   lgraph_type    *lgraph_p;
2472   PetscContainer c;
2473   PetscErrorCode ierr;
2474 
2475   PetscFunctionBegin;
2476   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2477   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2478   ierr = VecCopy(b, x);CHKERRQ(ierr);
2479 
2480   PetscScalar *array_x;
2481   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2482   PetscInt sx;
2483   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2484 
2485   PetscScalar *array_b;
2486   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2487   PetscInt sb;
2488   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2489 
2490   lgraph_type& level_graph = *lgraph_p;
2491   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2492 
2493   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2494   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2495   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2496 
2497   typedef boost::iterator_property_map<array_ref_type::iterator,
2498                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2499   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2500   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2501 
2502   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2503   PetscFunctionReturn(0);
2504 }
2505 #endif
2506 
2507 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2508   PetscInt    nzlocal,nsends,nrecvs;
2509   PetscMPIInt *send_rank,*recv_rank;
2510   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2511   PetscScalar *sbuf_a,**rbuf_a;
2512   PetscErrorCode (*Destroy)(Mat);
2513 } Mat_Redundant;
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2517 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2521   PetscInt       i;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2525   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2526   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2527   for (i=0; i<redund->nrecvs; i++) {
2528     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2529     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2530   }
2531   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2532   ierr = PetscFree(redund);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatDestroy_MatRedundant"
2538 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2539 {
2540   PetscErrorCode ierr;
2541   PetscContainer container;
2542   Mat_Redundant  *redund=PETSC_NULL;
2543 
2544   PetscFunctionBegin;
2545   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2546   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2547   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2548 
2549   A->ops->destroy = redund->Destroy;
2550 
2551   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2552   if (A->ops->destroy) {
2553     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2560 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2561 {
2562   PetscMPIInt    rank,size;
2563   MPI_Comm       comm=((PetscObject)mat)->comm;
2564   PetscErrorCode ierr;
2565   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2566   PetscMPIInt    *send_rank= PETSC_NULL,*recv_rank=PETSC_NULL;
2567   PetscInt       *rowrange = mat->rmap->range;
2568   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2569   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2570   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2571   PetscScalar    *sbuf_a;
2572   PetscInt       nzlocal=a->nz+b->nz;
2573   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2574   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2575   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2576   MatScalar      *aworkA,*aworkB;
2577   PetscScalar    *vals;
2578   PetscMPIInt    tag1,tag2,tag3,imdex;
2579   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL;
2580   MPI_Request    *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2581   MPI_Status     recv_status,*send_status;
2582   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2583   PetscInt       **rbuf_j=PETSC_NULL;
2584   PetscScalar    **rbuf_a=PETSC_NULL;
2585   Mat_Redundant  *redund =PETSC_NULL;
2586   PetscContainer container;
2587 
2588   PetscFunctionBegin;
2589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2591 
2592   if (reuse == MAT_REUSE_MATRIX) {
2593     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2594     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2595     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2596     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2597     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2598     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2599     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2600     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2601 
2602     nsends    = redund->nsends;
2603     nrecvs    = redund->nrecvs;
2604     send_rank = redund->send_rank;
2605     recv_rank = redund->recv_rank;
2606     sbuf_nz   = redund->sbuf_nz;
2607     rbuf_nz   = redund->rbuf_nz;
2608     sbuf_j    = redund->sbuf_j;
2609     sbuf_a    = redund->sbuf_a;
2610     rbuf_j    = redund->rbuf_j;
2611     rbuf_a    = redund->rbuf_a;
2612   }
2613 
2614   if (reuse == MAT_INITIAL_MATRIX) {
2615     PetscMPIInt subrank,subsize;
2616     PetscInt    nleftover,np_subcomm;
2617     /* get the destination processors' id send_rank, nsends and nrecvs */
2618     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2619     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2620     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2621 
2622     np_subcomm = size/nsubcomm;
2623     nleftover  = size - nsubcomm*np_subcomm;
2624 
2625     nsends = 0; nrecvs = 0;
2626     for (i=0; i<size; i++) { /* i=rank*/
2627       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2628         send_rank[nsends]   = i; nsends++;
2629         recv_rank[nrecvs++] = i;
2630       }
2631     }
2632     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2633       i = size-nleftover-1;
2634       j = 0;
2635       while (j < nsubcomm - nleftover) {
2636         send_rank[nsends++] = i;
2637         i--; j++;
2638       }
2639     }
2640 
2641     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2642       for (i=0; i<nleftover; i++) {
2643         recv_rank[nrecvs++] = size-nleftover+i;
2644       }
2645     }
2646 
2647     /* allocate sbuf_j, sbuf_a */
2648     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2649     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2650     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2651   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2652 
2653   /* copy mat's local entries into the buffers */
2654   if (reuse == MAT_INITIAL_MATRIX) {
2655     rownz_max = 0;
2656     rptr      = sbuf_j;
2657     cols      = sbuf_j + rend-rstart + 1;
2658     vals      = sbuf_a;
2659     rptr[0]   = 0;
2660     for (i=0; i<rend-rstart; i++) {
2661       row    = i + rstart;
2662       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2663       ncols  = nzA + nzB;
2664       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2665       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2666       /* load the column indices for this row into cols */
2667       lwrite = 0;
2668       for (l=0; l<nzB; l++) {
2669         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2670           vals[lwrite]   = aworkB[l];
2671           cols[lwrite++] = ctmp;
2672         }
2673       }
2674       for (l=0; l<nzA; l++) {
2675         vals[lwrite]   = aworkA[l];
2676         cols[lwrite++] = cstart + cworkA[l];
2677       }
2678       for (l=0; l<nzB; l++) {
2679         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2680           vals[lwrite]   = aworkB[l];
2681           cols[lwrite++] = ctmp;
2682         }
2683       }
2684       vals     += ncols;
2685       cols     += ncols;
2686       rptr[i+1] = rptr[i] + ncols;
2687       if (rownz_max < ncols) rownz_max = ncols;
2688     }
2689     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2690   } else { /* only copy matrix values into sbuf_a */
2691     rptr    = sbuf_j;
2692     vals    = sbuf_a;
2693     rptr[0] = 0;
2694     for (i=0; i<rend-rstart; i++) {
2695       row    = i + rstart;
2696       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2697       ncols  = nzA + nzB;
2698       cworkB = b->j + b->i[i];
2699       aworkA = a->a + a->i[i];
2700       aworkB = b->a + b->i[i];
2701       lwrite = 0;
2702       for (l=0; l<nzB; l++) {
2703         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2704       }
2705       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2708       }
2709       vals     += ncols;
2710       rptr[i+1] = rptr[i] + ncols;
2711     }
2712   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2713 
2714   /* send nzlocal to others, and recv other's nzlocal */
2715   /*--------------------------------------------------*/
2716   if (reuse == MAT_INITIAL_MATRIX) {
2717     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2718 
2719     s_waits2 = s_waits3 + nsends;
2720     s_waits1 = s_waits2 + nsends;
2721     r_waits1 = s_waits1 + nsends;
2722     r_waits2 = r_waits1 + nrecvs;
2723     r_waits3 = r_waits2 + nrecvs;
2724   } else {
2725     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2726 
2727     r_waits3 = s_waits3 + nsends;
2728   }
2729 
2730   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2731   if (reuse == MAT_INITIAL_MATRIX) {
2732     /* get new tags to keep the communication clean */
2733     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2734     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2735     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2736 
2737     /* post receives of other's nzlocal */
2738     for (i=0; i<nrecvs; i++) {
2739       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2740     }
2741     /* send nzlocal to others */
2742     for (i=0; i<nsends; i++) {
2743       sbuf_nz[i] = nzlocal;
2744       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2745     }
2746     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2747     count = nrecvs;
2748     while (count) {
2749       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2750 
2751       recv_rank[imdex] = recv_status.MPI_SOURCE;
2752       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2753       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2754 
2755       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2756 
2757       rbuf_nz[imdex] += i + 2;
2758 
2759       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2760       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2761       count--;
2762     }
2763     /* wait on sends of nzlocal */
2764     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2765     /* send mat->i,j to others, and recv from other's */
2766     /*------------------------------------------------*/
2767     for (i=0; i<nsends; i++) {
2768       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2769       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2770     }
2771     /* wait on receives of mat->i,j */
2772     /*------------------------------*/
2773     count = nrecvs;
2774     while (count) {
2775       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2776       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2777       count--;
2778     }
2779     /* wait on sends of mat->i,j */
2780     /*---------------------------*/
2781     if (nsends) {
2782       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2783     }
2784   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2785 
2786   /* post receives, send and receive mat->a */
2787   /*----------------------------------------*/
2788   for (imdex=0; imdex<nrecvs; imdex++) {
2789     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2790   }
2791   for (i=0; i<nsends; i++) {
2792     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2793   }
2794   count = nrecvs;
2795   while (count) {
2796     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2797     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2798     count--;
2799   }
2800   if (nsends) {
2801     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2802   }
2803 
2804   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2805 
2806   /* create redundant matrix */
2807   /*-------------------------*/
2808   if (reuse == MAT_INITIAL_MATRIX) {
2809     /* compute rownz_max for preallocation */
2810     for (imdex=0; imdex<nrecvs; imdex++) {
2811       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2812       rptr = rbuf_j[imdex];
2813       for (i=0; i<j; i++) {
2814         ncols = rptr[i+1] - rptr[i];
2815         if (rownz_max < ncols) rownz_max = ncols;
2816       }
2817     }
2818 
2819     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2820     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2821     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2822     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2823     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2824     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2825   } else {
2826     C = *matredundant;
2827   }
2828 
2829   /* insert local matrix entries */
2830   rptr = sbuf_j;
2831   cols = sbuf_j + rend-rstart + 1;
2832   vals = sbuf_a;
2833   for (i=0; i<rend-rstart; i++) {
2834     row   = i + rstart;
2835     ncols = rptr[i+1] - rptr[i];
2836     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2837     vals += ncols;
2838     cols += ncols;
2839   }
2840   /* insert received matrix entries */
2841   for (imdex=0; imdex<nrecvs; imdex++) {
2842     rstart = rowrange[recv_rank[imdex]];
2843     rend   = rowrange[recv_rank[imdex]+1];
2844     rptr   = rbuf_j[imdex];
2845     cols   = rbuf_j[imdex] + rend-rstart + 1;
2846     vals   = rbuf_a[imdex];
2847     for (i=0; i<rend-rstart; i++) {
2848       row   = i + rstart;
2849       ncols = rptr[i+1] - rptr[i];
2850       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2851       vals += ncols;
2852       cols += ncols;
2853     }
2854   }
2855   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2856   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2857   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2858   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2859   if (reuse == MAT_INITIAL_MATRIX) {
2860     PetscContainer container;
2861     *matredundant = C;
2862     /* create a supporting struct and attach it to C for reuse */
2863     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2864     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2865     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2866     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2867     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2868     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2869 
2870     redund->nzlocal   = nzlocal;
2871     redund->nsends    = nsends;
2872     redund->nrecvs    = nrecvs;
2873     redund->send_rank = send_rank;
2874     redund->recv_rank = recv_rank;
2875     redund->sbuf_nz   = sbuf_nz;
2876     redund->rbuf_nz   = rbuf_nz;
2877     redund->sbuf_j    = sbuf_j;
2878     redund->sbuf_a    = sbuf_a;
2879     redund->rbuf_j    = rbuf_j;
2880     redund->rbuf_a    = rbuf_a;
2881 
2882     redund->Destroy = C->ops->destroy;
2883     C->ops->destroy = MatDestroy_MatRedundant;
2884   }
2885   PetscFunctionReturn(0);
2886 }
2887 
2888 #undef __FUNCT__
2889 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2890 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2891 {
2892   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2893   PetscErrorCode ierr;
2894   PetscInt       i,*idxb = 0;
2895   PetscScalar    *va,*vb;
2896   Vec            vtmp;
2897 
2898   PetscFunctionBegin;
2899   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2900   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2901   if (idx) {
2902     for (i=0; i<A->rmap->n; i++) {
2903       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2904     }
2905   }
2906 
2907   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2908   if (idx) {
2909     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2910   }
2911   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2912   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2913 
2914   for (i=0; i<A->rmap->n; i++) {
2915     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2916       va[i] = vb[i];
2917       if (idx) idx[i] = a->garray[idxb[i]];
2918     }
2919   }
2920 
2921   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2922   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2923   ierr = PetscFree(idxb);CHKERRQ(ierr);
2924   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2930 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2931 {
2932   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2933   PetscErrorCode ierr;
2934   PetscInt       i,*idxb = 0;
2935   PetscScalar    *va,*vb;
2936   Vec            vtmp;
2937 
2938   PetscFunctionBegin;
2939   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2940   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2941   if (idx) {
2942     for (i=0; i<A->cmap->n; i++) {
2943       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2944     }
2945   }
2946 
2947   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2948   if (idx) {
2949     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2950   }
2951   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2952   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2953 
2954   for (i=0; i<A->rmap->n; i++) {
2955     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2956       va[i] = vb[i];
2957       if (idx) idx[i] = a->garray[idxb[i]];
2958     }
2959   }
2960 
2961   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2962   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2963   ierr = PetscFree(idxb);CHKERRQ(ierr);
2964   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2970 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2971 {
2972   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2973   PetscInt       n      = A->rmap->n;
2974   PetscInt       cstart = A->cmap->rstart;
2975   PetscInt       *cmap  = mat->garray;
2976   PetscInt       *diagIdx, *offdiagIdx;
2977   Vec            diagV, offdiagV;
2978   PetscScalar    *a, *diagA, *offdiagA;
2979   PetscInt       r;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2984   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2985   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2986   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2987   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2989   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2990   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2991   for (r = 0; r < n; ++r) {
2992     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2993       a[r]   = diagA[r];
2994       idx[r] = cstart + diagIdx[r];
2995     } else {
2996       a[r]   = offdiagA[r];
2997       idx[r] = cmap[offdiagIdx[r]];
2998     }
2999   }
3000   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3002   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3003   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3004   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3005   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3011 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3012 {
3013   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3014   PetscInt       n      = A->rmap->n;
3015   PetscInt       cstart = A->cmap->rstart;
3016   PetscInt       *cmap  = mat->garray;
3017   PetscInt       *diagIdx, *offdiagIdx;
3018   Vec            diagV, offdiagV;
3019   PetscScalar    *a, *diagA, *offdiagA;
3020   PetscInt       r;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3025   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3026   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3027   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3028   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3029   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3030   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3031   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3032   for (r = 0; r < n; ++r) {
3033     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3034       a[r]   = diagA[r];
3035       idx[r] = cstart + diagIdx[r];
3036     } else {
3037       a[r]   = offdiagA[r];
3038       idx[r] = cmap[offdiagIdx[r]];
3039     }
3040   }
3041   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3042   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3043   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3044   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3045   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3046   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 #undef __FUNCT__
3051 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3052 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3053 {
3054   PetscErrorCode ierr;
3055   Mat            *dummy;
3056 
3057   PetscFunctionBegin;
3058   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3059   *newmat = *dummy;
3060   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3061   PetscFunctionReturn(0);
3062 }
3063 
3064 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3068 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3069 {
3070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3071   PetscErrorCode ierr;
3072 
3073   PetscFunctionBegin;
3074   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3080 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3081 {
3082   PetscErrorCode ierr;
3083   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3084 
3085   PetscFunctionBegin;
3086   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3087   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3088   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 /* -------------------------------------------------------------------*/
3094 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3095        MatGetRow_MPIAIJ,
3096        MatRestoreRow_MPIAIJ,
3097        MatMult_MPIAIJ,
3098 /* 4*/ MatMultAdd_MPIAIJ,
3099        MatMultTranspose_MPIAIJ,
3100        MatMultTransposeAdd_MPIAIJ,
3101 #if defined(PETSC_HAVE_PBGL)
3102        MatSolve_MPIAIJ,
3103 #else
3104        0,
3105 #endif
3106        0,
3107        0,
3108 /*10*/ 0,
3109        0,
3110        0,
3111        MatSOR_MPIAIJ,
3112        MatTranspose_MPIAIJ,
3113 /*15*/ MatGetInfo_MPIAIJ,
3114        MatEqual_MPIAIJ,
3115        MatGetDiagonal_MPIAIJ,
3116        MatDiagonalScale_MPIAIJ,
3117        MatNorm_MPIAIJ,
3118 /*20*/ MatAssemblyBegin_MPIAIJ,
3119        MatAssemblyEnd_MPIAIJ,
3120        MatSetOption_MPIAIJ,
3121        MatZeroEntries_MPIAIJ,
3122 /*24*/ MatZeroRows_MPIAIJ,
3123        0,
3124 #if defined(PETSC_HAVE_PBGL)
3125        0,
3126 #else
3127        0,
3128 #endif
3129        0,
3130        0,
3131 /*29*/ MatSetUp_MPIAIJ,
3132 #if defined(PETSC_HAVE_PBGL)
3133        0,
3134 #else
3135        0,
3136 #endif
3137        0,
3138        0,
3139        0,
3140 /*34*/ MatDuplicate_MPIAIJ,
3141        0,
3142        0,
3143        0,
3144        0,
3145 /*39*/ MatAXPY_MPIAIJ,
3146        MatGetSubMatrices_MPIAIJ,
3147        MatIncreaseOverlap_MPIAIJ,
3148        MatGetValues_MPIAIJ,
3149        MatCopy_MPIAIJ,
3150 /*44*/ MatGetRowMax_MPIAIJ,
3151        MatScale_MPIAIJ,
3152        0,
3153        0,
3154        MatZeroRowsColumns_MPIAIJ,
3155 /*49*/ MatSetRandom_MPIAIJ,
3156        0,
3157        0,
3158        0,
3159        0,
3160 /*54*/ MatFDColoringCreate_MPIAIJ,
3161        0,
3162        MatSetUnfactored_MPIAIJ,
3163        MatPermute_MPIAIJ,
3164        0,
3165 /*59*/ MatGetSubMatrix_MPIAIJ,
3166        MatDestroy_MPIAIJ,
3167        MatView_MPIAIJ,
3168        0,
3169        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3170 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3171        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3172        0,
3173        0,
3174        0,
3175 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3176        MatGetRowMinAbs_MPIAIJ,
3177        0,
3178        MatSetColoring_MPIAIJ,
3179        0,
3180        MatSetValuesAdifor_MPIAIJ,
3181 /*75*/ MatFDColoringApply_AIJ,
3182        0,
3183        0,
3184        0,
3185        MatFindZeroDiagonals_MPIAIJ,
3186 /*80*/ 0,
3187        0,
3188        0,
3189 /*83*/ MatLoad_MPIAIJ,
3190        0,
3191        0,
3192        0,
3193        0,
3194        0,
3195 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3196        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3197        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3198        MatPtAP_MPIAIJ_MPIAIJ,
3199        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3200 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3201        0,
3202        0,
3203        0,
3204        0,
3205 /*99*/ 0,
3206        0,
3207        0,
3208        MatConjugate_MPIAIJ,
3209        0,
3210 /*104*/MatSetValuesRow_MPIAIJ,
3211        MatRealPart_MPIAIJ,
3212        MatImaginaryPart_MPIAIJ,
3213        0,
3214        0,
3215 /*109*/0,
3216        MatGetRedundantMatrix_MPIAIJ,
3217        MatGetRowMin_MPIAIJ,
3218        0,
3219        0,
3220 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3221        0,
3222        0,
3223        0,
3224        0,
3225 /*119*/0,
3226        0,
3227        0,
3228        0,
3229        MatGetMultiProcBlock_MPIAIJ,
3230 /*124*/MatFindNonzeroRows_MPIAIJ,
3231        MatGetColumnNorms_MPIAIJ,
3232        MatInvertBlockDiagonal_MPIAIJ,
3233        0,
3234        MatGetSubMatricesParallel_MPIAIJ,
3235 /*129*/0,
3236        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3237        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3238        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3239        0,
3240 /*134*/0,
3241        0,
3242        0,
3243        0,
3244        0
3245 };
3246 
3247 /* ----------------------------------------------------------------------------------------*/
3248 
3249 EXTERN_C_BEGIN
3250 #undef __FUNCT__
3251 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3252 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3253 {
3254   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3259   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3260   PetscFunctionReturn(0);
3261 }
3262 EXTERN_C_END
3263 
3264 EXTERN_C_BEGIN
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3267 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3268 {
3269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3270   PetscErrorCode ierr;
3271 
3272   PetscFunctionBegin;
3273   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3274   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3275   PetscFunctionReturn(0);
3276 }
3277 EXTERN_C_END
3278 
3279 EXTERN_C_BEGIN
3280 #undef __FUNCT__
3281 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3282 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3283 {
3284   Mat_MPIAIJ     *b;
3285   PetscErrorCode ierr;
3286   PetscInt       i;
3287   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3288 
3289   PetscFunctionBegin;
3290   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3291   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3292   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3293   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3294   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3295   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3296 
3297   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3298   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3299   if (d_nnz) {
3300     for (i=0; i<B->rmap->n; i++) {
3301       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3302     }
3303   }
3304   if (o_nnz) {
3305     for (i=0; i<B->rmap->n; i++) {
3306       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3307     }
3308   }
3309   b = (Mat_MPIAIJ*)B->data;
3310 
3311   if (!B->preallocated) {
3312     /* Explicitly create 2 MATSEQAIJ matrices. */
3313     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3314     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3315     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3316     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3317     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3318     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3319     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3320     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3321     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3322     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3323   }
3324 
3325   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3326   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3327   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3328   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3329   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3330   B->preallocated = PETSC_TRUE;
3331   PetscFunctionReturn(0);
3332 }
3333 EXTERN_C_END
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3337 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3338 {
3339   Mat            mat;
3340   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3341   PetscErrorCode ierr;
3342 
3343   PetscFunctionBegin;
3344   *newmat = 0;
3345   ierr    = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3346   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3347   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3348   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3349   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3350   a       = (Mat_MPIAIJ*)mat->data;
3351 
3352   mat->factortype   = matin->factortype;
3353   mat->rmap->bs     = matin->rmap->bs;
3354   mat->cmap->bs     = matin->cmap->bs;
3355   mat->assembled    = PETSC_TRUE;
3356   mat->insertmode   = NOT_SET_VALUES;
3357   mat->preallocated = PETSC_TRUE;
3358 
3359   a->size         = oldmat->size;
3360   a->rank         = oldmat->rank;
3361   a->donotstash   = oldmat->donotstash;
3362   a->roworiented  = oldmat->roworiented;
3363   a->rowindices   = 0;
3364   a->rowvalues    = 0;
3365   a->getrowactive = PETSC_FALSE;
3366 
3367   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3368   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3369 
3370   if (oldmat->colmap) {
3371 #if defined(PETSC_USE_CTABLE)
3372     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3373 #else
3374     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3375     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3376     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3377 #endif
3378   } else a->colmap = 0;
3379   if (oldmat->garray) {
3380     PetscInt len;
3381     len  = oldmat->B->cmap->n;
3382     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3383     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3384     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3385   } else a->garray = 0;
3386 
3387   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3388   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3389   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3390   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3391   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3393   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3395   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3396   *newmat = mat;
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatLoad_MPIAIJ"
3404 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3405 {
3406   PetscScalar    *vals,*svals;
3407   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3408   PetscErrorCode ierr;
3409   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3410   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3411   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3412   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3413   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3414   int            fd;
3415   PetscInt       bs = 1;
3416 
3417   PetscFunctionBegin;
3418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3419   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3420   if (!rank) {
3421     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3422     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3423     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3424   }
3425 
3426   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3427   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3428   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3429 
3430   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3431 
3432   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3433   M    = header[1]; N = header[2];
3434   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3435   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3436   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3437 
3438   /* If global sizes are set, check if they are consistent with that given in the file */
3439   if (sizesset) {
3440     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3441   }
3442   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3443   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3444 
3445   /* determine ownership of all (block) rows */
3446   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3447   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3448   else m = newMat->rmap->n; /* Set by user */
3449 
3450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3451   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3452 
3453   /* First process needs enough room for process with most rows */
3454   if (!rank) {
3455     mmax = rowners[1];
3456     for (i=2; i<=size; i++) {
3457       mmax = PetscMax(mmax, rowners[i]);
3458     }
3459   } else mmax = m;
3460 
3461   rowners[0] = 0;
3462   for (i=2; i<=size; i++) {
3463     rowners[i] += rowners[i-1];
3464   }
3465   rstart = rowners[rank];
3466   rend   = rowners[rank+1];
3467 
3468   /* distribute row lengths to all processors */
3469   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3470   if (!rank) {
3471     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3472     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3473     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3474     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3475     for (j=0; j<m; j++) {
3476       procsnz[0] += ourlens[j];
3477     }
3478     for (i=1; i<size; i++) {
3479       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3480       /* calculate the number of nonzeros on each processor */
3481       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3482         procsnz[i] += rowlengths[j];
3483       }
3484       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3485     }
3486     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3487   } else {
3488     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3489   }
3490 
3491   if (!rank) {
3492     /* determine max buffer needed and allocate it */
3493     maxnz = 0;
3494     for (i=0; i<size; i++) {
3495       maxnz = PetscMax(maxnz,procsnz[i]);
3496     }
3497     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3498 
3499     /* read in my part of the matrix column indices  */
3500     nz   = procsnz[0];
3501     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3502     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3503 
3504     /* read in every one elses and ship off */
3505     for (i=1; i<size; i++) {
3506       nz   = procsnz[i];
3507       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3508       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3509     }
3510     ierr = PetscFree(cols);CHKERRQ(ierr);
3511   } else {
3512     /* determine buffer space needed for message */
3513     nz = 0;
3514     for (i=0; i<m; i++) {
3515       nz += ourlens[i];
3516     }
3517     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3518 
3519     /* receive message of column indices*/
3520     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3521   }
3522 
3523   /* determine column ownership if matrix is not square */
3524   if (N != M) {
3525     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3526     else n = newMat->cmap->n;
3527     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3528     cstart = cend - n;
3529   } else {
3530     cstart = rstart;
3531     cend   = rend;
3532     n      = cend - cstart;
3533   }
3534 
3535   /* loop over local rows, determining number of off diagonal entries */
3536   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3537   jj   = 0;
3538   for (i=0; i<m; i++) {
3539     for (j=0; j<ourlens[i]; j++) {
3540       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3541       jj++;
3542     }
3543   }
3544 
3545   for (i=0; i<m; i++) {
3546     ourlens[i] -= offlens[i];
3547   }
3548   if (!sizesset) {
3549     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3550   }
3551 
3552   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3553 
3554   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3555 
3556   for (i=0; i<m; i++) {
3557     ourlens[i] += offlens[i];
3558   }
3559 
3560   if (!rank) {
3561     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3562 
3563     /* read in my part of the matrix numerical values  */
3564     nz   = procsnz[0];
3565     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3566 
3567     /* insert into matrix */
3568     jj      = rstart;
3569     smycols = mycols;
3570     svals   = vals;
3571     for (i=0; i<m; i++) {
3572       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3573       smycols += ourlens[i];
3574       svals   += ourlens[i];
3575       jj++;
3576     }
3577 
3578     /* read in other processors and ship out */
3579     for (i=1; i<size; i++) {
3580       nz   = procsnz[i];
3581       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3582       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3583     }
3584     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3585   } else {
3586     /* receive numeric values */
3587     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3588 
3589     /* receive message of values*/
3590     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3591 
3592     /* insert into matrix */
3593     jj      = rstart;
3594     smycols = mycols;
3595     svals   = vals;
3596     for (i=0; i<m; i++) {
3597       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3598       smycols += ourlens[i];
3599       svals   += ourlens[i];
3600       jj++;
3601     }
3602   }
3603   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3604   ierr = PetscFree(vals);CHKERRQ(ierr);
3605   ierr = PetscFree(mycols);CHKERRQ(ierr);
3606   ierr = PetscFree(rowners);CHKERRQ(ierr);
3607   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3608   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3609   PetscFunctionReturn(0);
3610 }
3611 
3612 #undef __FUNCT__
3613 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3614 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3615 {
3616   PetscErrorCode ierr;
3617   IS             iscol_local;
3618   PetscInt       csize;
3619 
3620   PetscFunctionBegin;
3621   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3622   if (call == MAT_REUSE_MATRIX) {
3623     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3624     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3625   } else {
3626     PetscInt cbs;
3627     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3628     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3629     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3630   }
3631   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3632   if (call == MAT_INITIAL_MATRIX) {
3633     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3634     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3635   }
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3642 /*
3643     Not great since it makes two copies of the submatrix, first an SeqAIJ
3644   in local and then by concatenating the local matrices the end result.
3645   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3646 
3647   Note: This requires a sequential iscol with all indices.
3648 */
3649 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3650 {
3651   PetscErrorCode ierr;
3652   PetscMPIInt    rank,size;
3653   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3654   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3655   PetscBool      allcolumns, colflag;
3656   Mat            M,Mreuse;
3657   MatScalar      *vwork,*aa;
3658   MPI_Comm       comm = ((PetscObject)mat)->comm;
3659   Mat_SeqAIJ     *aij;
3660 
3661 
3662   PetscFunctionBegin;
3663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3665 
3666   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3667   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3668   if (colflag && ncol == mat->cmap->N) {
3669     allcolumns = PETSC_TRUE;
3670   } else {
3671     allcolumns = PETSC_FALSE;
3672   }
3673   if (call ==  MAT_REUSE_MATRIX) {
3674     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3675     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3676     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3677   } else {
3678     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3679   }
3680 
3681   /*
3682       m - number of local rows
3683       n - number of columns (same on all processors)
3684       rstart - first row in new global matrix generated
3685   */
3686   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3687   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3688   if (call == MAT_INITIAL_MATRIX) {
3689     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3690     ii  = aij->i;
3691     jj  = aij->j;
3692 
3693     /*
3694         Determine the number of non-zeros in the diagonal and off-diagonal
3695         portions of the matrix in order to do correct preallocation
3696     */
3697 
3698     /* first get start and end of "diagonal" columns */
3699     if (csize == PETSC_DECIDE) {
3700       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3701       if (mglobal == n) { /* square matrix */
3702         nlocal = m;
3703       } else {
3704         nlocal = n/size + ((n % size) > rank);
3705       }
3706     } else {
3707       nlocal = csize;
3708     }
3709     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3710     rstart = rend - nlocal;
3711     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3712 
3713     /* next, compute all the lengths */
3714     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3715     olens = dlens + m;
3716     for (i=0; i<m; i++) {
3717       jend = ii[i+1] - ii[i];
3718       olen = 0;
3719       dlen = 0;
3720       for (j=0; j<jend; j++) {
3721         if (*jj < rstart || *jj >= rend) olen++;
3722         else dlen++;
3723         jj++;
3724       }
3725       olens[i] = olen;
3726       dlens[i] = dlen;
3727     }
3728     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3729     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3730     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3731     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3732     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3733     ierr = PetscFree(dlens);CHKERRQ(ierr);
3734   } else {
3735     PetscInt ml,nl;
3736 
3737     M    = *newmat;
3738     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3739     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3740     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3741     /*
3742          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3743        rather than the slower MatSetValues().
3744     */
3745     M->was_assembled = PETSC_TRUE;
3746     M->assembled     = PETSC_FALSE;
3747   }
3748   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3749   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3750   ii   = aij->i;
3751   jj   = aij->j;
3752   aa   = aij->a;
3753   for (i=0; i<m; i++) {
3754     row   = rstart + i;
3755     nz    = ii[i+1] - ii[i];
3756     cwork = jj;     jj += nz;
3757     vwork = aa;     aa += nz;
3758     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3759   }
3760 
3761   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763   *newmat = M;
3764 
3765   /* save submatrix used in processor for next request */
3766   if (call ==  MAT_INITIAL_MATRIX) {
3767     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3768     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3769   }
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 EXTERN_C_BEGIN
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3776 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3777 {
3778   PetscInt       m,cstart, cend,j,nnz,i,d;
3779   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3780   const PetscInt *JJ;
3781   PetscScalar    *values;
3782   PetscErrorCode ierr;
3783 
3784   PetscFunctionBegin;
3785   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3786 
3787   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3788   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3789   m      = B->rmap->n;
3790   cstart = B->cmap->rstart;
3791   cend   = B->cmap->rend;
3792   rstart = B->rmap->rstart;
3793 
3794   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3795 
3796 #if defined(PETSC_USE_DEBUGGING)
3797   for (i=0; i<m; i++) {
3798     nnz = Ii[i+1]- Ii[i];
3799     JJ  = J + Ii[i];
3800     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3801     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3802     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3803   }
3804 #endif
3805 
3806   for (i=0; i<m; i++) {
3807     nnz     = Ii[i+1]- Ii[i];
3808     JJ      = J + Ii[i];
3809     nnz_max = PetscMax(nnz_max,nnz);
3810     d       = 0;
3811     for (j=0; j<nnz; j++) {
3812       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3813     }
3814     d_nnz[i] = d;
3815     o_nnz[i] = nnz - d;
3816   }
3817   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3818   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3819 
3820   if (v) values = (PetscScalar*)v;
3821   else {
3822     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3823     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3824   }
3825 
3826   for (i=0; i<m; i++) {
3827     ii   = i + rstart;
3828     nnz  = Ii[i+1]- Ii[i];
3829     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3830   }
3831   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833 
3834   if (!v) {
3835     ierr = PetscFree(values);CHKERRQ(ierr);
3836   }
3837   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3838   PetscFunctionReturn(0);
3839 }
3840 EXTERN_C_END
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3844 /*@
3845    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3846    (the default parallel PETSc format).
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  B - the matrix
3852 .  i - the indices into j for the start of each local row (starts with zero)
3853 .  j - the column indices for each local row (starts with zero)
3854 -  v - optional values in the matrix
3855 
3856    Level: developer
3857 
3858    Notes:
3859        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3860      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3861      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3862 
3863        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3864 
3865        The format which is used for the sparse matrix input, is equivalent to a
3866     row-major ordering.. i.e for the following matrix, the input data expected is
3867     as shown:
3868 
3869         1 0 0
3870         2 0 3     P0
3871        -------
3872         4 5 6     P1
3873 
3874      Process0 [P0]: rows_owned=[0,1]
3875         i =  {0,1,3}  [size = nrow+1  = 2+1]
3876         j =  {0,0,2}  [size = nz = 6]
3877         v =  {1,2,3}  [size = nz = 6]
3878 
3879      Process1 [P1]: rows_owned=[2]
3880         i =  {0,3}    [size = nrow+1  = 1+1]
3881         j =  {0,1,2}  [size = nz = 6]
3882         v =  {4,5,6}  [size = nz = 6]
3883 
3884 .keywords: matrix, aij, compressed row, sparse, parallel
3885 
3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3887           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3888 @*/
3889 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3890 {
3891   PetscErrorCode ierr;
3892 
3893   PetscFunctionBegin;
3894   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3900 /*@C
3901    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3902    (the default parallel PETSc format).  For good matrix assembly performance
3903    the user should preallocate the matrix storage by setting the parameters
3904    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3905    performance can be increased by more than a factor of 50.
3906 
3907    Collective on MPI_Comm
3908 
3909    Input Parameters:
3910 +  A - the matrix
3911 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3912            (same value is used for all local rows)
3913 .  d_nnz - array containing the number of nonzeros in the various rows of the
3914            DIAGONAL portion of the local submatrix (possibly different for each row)
3915            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3916            The size of this array is equal to the number of local rows, i.e 'm'.
3917            For matrices that will be factored, you must leave room for (and set)
3918            the diagonal entry even if it is zero.
3919 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3920            submatrix (same value is used for all local rows).
3921 -  o_nnz - array containing the number of nonzeros in the various rows of the
3922            OFF-DIAGONAL portion of the local submatrix (possibly different for
3923            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3924            structure. The size of this array is equal to the number
3925            of local rows, i.e 'm'.
3926 
3927    If the *_nnz parameter is given then the *_nz parameter is ignored
3928 
3929    The AIJ format (also called the Yale sparse matrix format or
3930    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3931    storage.  The stored row and column indices begin with zero.
3932    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3933 
3934    The parallel matrix is partitioned such that the first m0 rows belong to
3935    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3936    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3937 
3938    The DIAGONAL portion of the local submatrix of a processor can be defined
3939    as the submatrix which is obtained by extraction the part corresponding to
3940    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3941    first row that belongs to the processor, r2 is the last row belonging to
3942    the this processor, and c1-c2 is range of indices of the local part of a
3943    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3944    common case of a square matrix, the row and column ranges are the same and
3945    the DIAGONAL part is also square. The remaining portion of the local
3946    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3947 
3948    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3949 
3950    You can call MatGetInfo() to get information on how effective the preallocation was;
3951    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3952    You can also run with the option -info and look for messages with the string
3953    malloc in them to see if additional memory allocation was needed.
3954 
3955    Example usage:
3956 
3957    Consider the following 8x8 matrix with 34 non-zero values, that is
3958    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3959    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3960    as follows:
3961 
3962 .vb
3963             1  2  0  |  0  3  0  |  0  4
3964     Proc0   0  5  6  |  7  0  0  |  8  0
3965             9  0 10  | 11  0  0  | 12  0
3966     -------------------------------------
3967            13  0 14  | 15 16 17  |  0  0
3968     Proc1   0 18  0  | 19 20 21  |  0  0
3969             0  0  0  | 22 23  0  | 24  0
3970     -------------------------------------
3971     Proc2  25 26 27  |  0  0 28  | 29  0
3972            30  0  0  | 31 32 33  |  0 34
3973 .ve
3974 
3975    This can be represented as a collection of submatrices as:
3976 
3977 .vb
3978       A B C
3979       D E F
3980       G H I
3981 .ve
3982 
3983    Where the submatrices A,B,C are owned by proc0, D,E,F are
3984    owned by proc1, G,H,I are owned by proc2.
3985 
3986    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3987    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3988    The 'M','N' parameters are 8,8, and have the same values on all procs.
3989 
3990    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3991    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3992    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3993    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3994    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3995    matrix, ans [DF] as another SeqAIJ matrix.
3996 
3997    When d_nz, o_nz parameters are specified, d_nz storage elements are
3998    allocated for every row of the local diagonal submatrix, and o_nz
3999    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4000    One way to choose d_nz and o_nz is to use the max nonzerors per local
4001    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4002    In this case, the values of d_nz,o_nz are:
4003 .vb
4004      proc0 : dnz = 2, o_nz = 2
4005      proc1 : dnz = 3, o_nz = 2
4006      proc2 : dnz = 1, o_nz = 4
4007 .ve
4008    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4009    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4010    for proc3. i.e we are using 12+15+10=37 storage locations to store
4011    34 values.
4012 
4013    When d_nnz, o_nnz parameters are specified, the storage is specified
4014    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4015    In the above case the values for d_nnz,o_nnz are:
4016 .vb
4017      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4018      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4019      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4020 .ve
4021    Here the space allocated is sum of all the above values i.e 34, and
4022    hence pre-allocation is perfect.
4023 
4024    Level: intermediate
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4030 @*/
4031 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4032 {
4033   PetscErrorCode ierr;
4034 
4035   PetscFunctionBegin;
4036   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4037   PetscValidType(B,1);
4038   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4039   PetscFunctionReturn(0);
4040 }
4041 
4042 #undef __FUNCT__
4043 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4044 /*@
4045      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4046          CSR format the local rows.
4047 
4048    Collective on MPI_Comm
4049 
4050    Input Parameters:
4051 +  comm - MPI communicator
4052 .  m - number of local rows (Cannot be PETSC_DECIDE)
4053 .  n - This value should be the same as the local size used in creating the
4054        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4055        calculated if N is given) For square matrices n is almost always m.
4056 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4057 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4058 .   i - row indices
4059 .   j - column indices
4060 -   a - matrix values
4061 
4062    Output Parameter:
4063 .   mat - the matrix
4064 
4065    Level: intermediate
4066 
4067    Notes:
4068        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4069      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4070      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4071 
4072        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4073 
4074        The format which is used for the sparse matrix input, is equivalent to a
4075     row-major ordering.. i.e for the following matrix, the input data expected is
4076     as shown:
4077 
4078         1 0 0
4079         2 0 3     P0
4080        -------
4081         4 5 6     P1
4082 
4083      Process0 [P0]: rows_owned=[0,1]
4084         i =  {0,1,3}  [size = nrow+1  = 2+1]
4085         j =  {0,0,2}  [size = nz = 6]
4086         v =  {1,2,3}  [size = nz = 6]
4087 
4088      Process1 [P1]: rows_owned=[2]
4089         i =  {0,3}    [size = nrow+1  = 1+1]
4090         j =  {0,1,2}  [size = nz = 6]
4091         v =  {4,5,6}  [size = nz = 6]
4092 
4093 .keywords: matrix, aij, compressed row, sparse, parallel
4094 
4095 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4096           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4097 @*/
4098 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4099 {
4100   PetscErrorCode ierr;
4101 
4102   PetscFunctionBegin;
4103   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4104   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4105   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4106   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4107   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4108   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4109   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 #undef __FUNCT__
4114 #define __FUNCT__ "MatCreateAIJ"
4115 /*@C
4116    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4117    (the default parallel PETSc format).  For good matrix assembly performance
4118    the user should preallocate the matrix storage by setting the parameters
4119    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4120    performance can be increased by more than a factor of 50.
4121 
4122    Collective on MPI_Comm
4123 
4124    Input Parameters:
4125 +  comm - MPI communicator
4126 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4127            This value should be the same as the local size used in creating the
4128            y vector for the matrix-vector product y = Ax.
4129 .  n - This value should be the same as the local size used in creating the
4130        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4131        calculated if N is given) For square matrices n is almost always m.
4132 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4133 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4134 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4135            (same value is used for all local rows)
4136 .  d_nnz - array containing the number of nonzeros in the various rows of the
4137            DIAGONAL portion of the local submatrix (possibly different for each row)
4138            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4139            The size of this array is equal to the number of local rows, i.e 'm'.
4140 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4141            submatrix (same value is used for all local rows).
4142 -  o_nnz - array containing the number of nonzeros in the various rows of the
4143            OFF-DIAGONAL portion of the local submatrix (possibly different for
4144            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4145            structure. The size of this array is equal to the number
4146            of local rows, i.e 'm'.
4147 
4148    Output Parameter:
4149 .  A - the matrix
4150 
4151    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4152    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4153    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4154 
4155    Notes:
4156    If the *_nnz parameter is given then the *_nz parameter is ignored
4157 
4158    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4159    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4160    storage requirements for this matrix.
4161 
4162    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4163    processor than it must be used on all processors that share the object for
4164    that argument.
4165 
4166    The user MUST specify either the local or global matrix dimensions
4167    (possibly both).
4168 
4169    The parallel matrix is partitioned across processors such that the
4170    first m0 rows belong to process 0, the next m1 rows belong to
4171    process 1, the next m2 rows belong to process 2 etc.. where
4172    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4173    values corresponding to [m x N] submatrix.
4174 
4175    The columns are logically partitioned with the n0 columns belonging
4176    to 0th partition, the next n1 columns belonging to the next
4177    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4178 
4179    The DIAGONAL portion of the local submatrix on any given processor
4180    is the submatrix corresponding to the rows and columns m,n
4181    corresponding to the given processor. i.e diagonal matrix on
4182    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4183    etc. The remaining portion of the local submatrix [m x (N-n)]
4184    constitute the OFF-DIAGONAL portion. The example below better
4185    illustrates this concept.
4186 
4187    For a square global matrix we define each processor's diagonal portion
4188    to be its local rows and the corresponding columns (a square submatrix);
4189    each processor's off-diagonal portion encompasses the remainder of the
4190    local matrix (a rectangular submatrix).
4191 
4192    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4193 
4194    When calling this routine with a single process communicator, a matrix of
4195    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4196    type of communicator, use the construction mechanism:
4197      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4198 
4199    By default, this format uses inodes (identical nodes) when possible.
4200    We search for consecutive rows with the same nonzero structure, thereby
4201    reusing matrix information to achieve increased efficiency.
4202 
4203    Options Database Keys:
4204 +  -mat_no_inode  - Do not use inodes
4205 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4206 -  -mat_aij_oneindex - Internally use indexing starting at 1
4207         rather than 0.  Note that when calling MatSetValues(),
4208         the user still MUST index entries starting at 0!
4209 
4210 
4211    Example usage:
4212 
4213    Consider the following 8x8 matrix with 34 non-zero values, that is
4214    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4215    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4216    as follows:
4217 
4218 .vb
4219             1  2  0  |  0  3  0  |  0  4
4220     Proc0   0  5  6  |  7  0  0  |  8  0
4221             9  0 10  | 11  0  0  | 12  0
4222     -------------------------------------
4223            13  0 14  | 15 16 17  |  0  0
4224     Proc1   0 18  0  | 19 20 21  |  0  0
4225             0  0  0  | 22 23  0  | 24  0
4226     -------------------------------------
4227     Proc2  25 26 27  |  0  0 28  | 29  0
4228            30  0  0  | 31 32 33  |  0 34
4229 .ve
4230 
4231    This can be represented as a collection of submatrices as:
4232 
4233 .vb
4234       A B C
4235       D E F
4236       G H I
4237 .ve
4238 
4239    Where the submatrices A,B,C are owned by proc0, D,E,F are
4240    owned by proc1, G,H,I are owned by proc2.
4241 
4242    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4243    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4244    The 'M','N' parameters are 8,8, and have the same values on all procs.
4245 
4246    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4247    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4248    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4249    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4250    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4251    matrix, ans [DF] as another SeqAIJ matrix.
4252 
4253    When d_nz, o_nz parameters are specified, d_nz storage elements are
4254    allocated for every row of the local diagonal submatrix, and o_nz
4255    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4256    One way to choose d_nz and o_nz is to use the max nonzerors per local
4257    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4258    In this case, the values of d_nz,o_nz are:
4259 .vb
4260      proc0 : dnz = 2, o_nz = 2
4261      proc1 : dnz = 3, o_nz = 2
4262      proc2 : dnz = 1, o_nz = 4
4263 .ve
4264    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4265    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4266    for proc3. i.e we are using 12+15+10=37 storage locations to store
4267    34 values.
4268 
4269    When d_nnz, o_nnz parameters are specified, the storage is specified
4270    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4271    In the above case the values for d_nnz,o_nnz are:
4272 .vb
4273      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4274      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4275      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4276 .ve
4277    Here the space allocated is sum of all the above values i.e 34, and
4278    hence pre-allocation is perfect.
4279 
4280    Level: intermediate
4281 
4282 .keywords: matrix, aij, compressed row, sparse, parallel
4283 
4284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4285           MPIAIJ, MatCreateMPIAIJWithArrays()
4286 @*/
4287 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4288 {
4289   PetscErrorCode ierr;
4290   PetscMPIInt    size;
4291 
4292   PetscFunctionBegin;
4293   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4294   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4295   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4296   if (size > 1) {
4297     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4298     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4299   } else {
4300     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4301     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4302   }
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #undef __FUNCT__
4307 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4308 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4309 {
4310   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4311 
4312   PetscFunctionBegin;
4313   *Ad     = a->A;
4314   *Ao     = a->B;
4315   *colmap = a->garray;
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 #undef __FUNCT__
4320 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4321 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4322 {
4323   PetscErrorCode ierr;
4324   PetscInt       i;
4325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4326 
4327   PetscFunctionBegin;
4328   if (coloring->ctype == IS_COLORING_GLOBAL) {
4329     ISColoringValue *allcolors,*colors;
4330     ISColoring      ocoloring;
4331 
4332     /* set coloring for diagonal portion */
4333     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4334 
4335     /* set coloring for off-diagonal portion */
4336     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4337     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4338     for (i=0; i<a->B->cmap->n; i++) {
4339       colors[i] = allcolors[a->garray[i]];
4340     }
4341     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4342     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4343     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4344     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4345   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4346     ISColoringValue *colors;
4347     PetscInt        *larray;
4348     ISColoring      ocoloring;
4349 
4350     /* set coloring for diagonal portion */
4351     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4352     for (i=0; i<a->A->cmap->n; i++) {
4353       larray[i] = i + A->cmap->rstart;
4354     }
4355     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4356     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4357     for (i=0; i<a->A->cmap->n; i++) {
4358       colors[i] = coloring->colors[larray[i]];
4359     }
4360     ierr = PetscFree(larray);CHKERRQ(ierr);
4361     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4362     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4363     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4364 
4365     /* set coloring for off-diagonal portion */
4366     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4367     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4368     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4369     for (i=0; i<a->B->cmap->n; i++) {
4370       colors[i] = coloring->colors[larray[i]];
4371     }
4372     ierr = PetscFree(larray);CHKERRQ(ierr);
4373     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4374     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4375     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4376   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4382 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4383 {
4384   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4385   PetscErrorCode ierr;
4386 
4387   PetscFunctionBegin;
4388   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4389   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4390   PetscFunctionReturn(0);
4391 }
4392 
4393 #undef __FUNCT__
4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4395 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4396 {
4397   PetscErrorCode ierr;
4398   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4399   PetscInt       *indx;
4400 
4401   PetscFunctionBegin;
4402   /* This routine will ONLY return MPIAIJ type matrix */
4403   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4404   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4405   if (n == PETSC_DECIDE) {
4406     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4407   }
4408   /* Check sum(n) = N */
4409   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4410   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4411 
4412   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4413   rstart -= m;
4414 
4415   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4416   for (i=0; i<m; i++) {
4417     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4418     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4419     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4420   }
4421 
4422   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4423   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4424   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4425   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4426   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4427   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4433 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4434 {
4435   PetscErrorCode ierr;
4436   PetscInt       m,N,i,rstart,nnz,Ii;
4437   PetscInt       *indx;
4438   PetscScalar    *values;
4439 
4440   PetscFunctionBegin;
4441   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4442   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4443   for (i=0; i<m; i++) {
4444     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4445     Ii   = i + rstart;
4446     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4447     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4448   }
4449   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4450   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4456 /*@
4457       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4458                  matrices from each processor
4459 
4460     Collective on MPI_Comm
4461 
4462    Input Parameters:
4463 +    comm - the communicators the parallel matrix will live on
4464 .    inmat - the input sequential matrices
4465 .    n - number of local columns (or PETSC_DECIDE)
4466 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4467 
4468    Output Parameter:
4469 .    outmat - the parallel matrix generated
4470 
4471     Level: advanced
4472 
4473    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4474 
4475 @*/
4476 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4477 {
4478   PetscErrorCode ierr;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4482   if (scall == MAT_INITIAL_MATRIX) {
4483     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4484   }
4485   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4486   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatFileSplit"
4492 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4493 {
4494   PetscErrorCode    ierr;
4495   PetscMPIInt       rank;
4496   PetscInt          m,N,i,rstart,nnz;
4497   size_t            len;
4498   const PetscInt    *indx;
4499   PetscViewer       out;
4500   char              *name;
4501   Mat               B;
4502   const PetscScalar *values;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4506   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4507   /* Should this be the type of the diagonal block of A? */
4508   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4509   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4510   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4511   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4512   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4523   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4524   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4525   sprintf(name,"%s.%d",outfile,rank);
4526   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4527   ierr = PetscFree(name);CHKERRQ(ierr);
4528   ierr = MatView(B,out);CHKERRQ(ierr);
4529   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4530   ierr = MatDestroy(&B);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4535 #undef __FUNCT__
4536 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4537 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4538 {
4539   PetscErrorCode      ierr;
4540   Mat_Merge_SeqsToMPI *merge;
4541   PetscContainer      container;
4542 
4543   PetscFunctionBegin;
4544   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4545   if (container) {
4546     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4559     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4560     ierr = PetscFree(merge);CHKERRQ(ierr);
4561     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4562   }
4563   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4564   PetscFunctionReturn(0);
4565 }
4566 
4567 #include <../src/mat/utils/freespace.h>
4568 #include <petscbt.h>
4569 
4570 #undef __FUNCT__
4571 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4572 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4573 {
4574   PetscErrorCode      ierr;
4575   MPI_Comm            comm=((PetscObject)mpimat)->comm;
4576   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4577   PetscMPIInt         size,rank,taga,*len_s;
4578   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4579   PetscInt            proc,m;
4580   PetscInt            **buf_ri,**buf_rj;
4581   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4582   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4583   MPI_Request         *s_waits,*r_waits;
4584   MPI_Status          *status;
4585   MatScalar           *aa=a->a;
4586   MatScalar           **abuf_r,*ba_i;
4587   Mat_Merge_SeqsToMPI *merge;
4588   PetscContainer      container;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4592 
4593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4594   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4595 
4596   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4597   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4598 
4599   bi     = merge->bi;
4600   bj     = merge->bj;
4601   buf_ri = merge->buf_ri;
4602   buf_rj = merge->buf_rj;
4603 
4604   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4605   owners = merge->rowmap->range;
4606   len_s  = merge->len_s;
4607 
4608   /* send and recv matrix values */
4609   /*-----------------------------*/
4610   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4611   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4612 
4613   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4614   for (proc=0,k=0; proc<size; proc++) {
4615     if (!len_s[proc]) continue;
4616     i    = owners[proc];
4617     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4618     k++;
4619   }
4620 
4621   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4622   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4623   ierr = PetscFree(status);CHKERRQ(ierr);
4624 
4625   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4626   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4627 
4628   /* insert mat values of mpimat */
4629   /*----------------------------*/
4630   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4631   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4632 
4633   for (k=0; k<merge->nrecv; k++) {
4634     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4635     nrows       = *(buf_ri_k[k]);
4636     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4637     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4638   }
4639 
4640   /* set values of ba */
4641   m = merge->rowmap->n;
4642   for (i=0; i<m; i++) {
4643     arow = owners[rank] + i;
4644     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4645     bnzi = bi[i+1] - bi[i];
4646     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4647 
4648     /* add local non-zero vals of this proc's seqmat into ba */
4649     anzi   = ai[arow+1] - ai[arow];
4650     aj     = a->j + ai[arow];
4651     aa     = a->a + ai[arow];
4652     nextaj = 0;
4653     for (j=0; nextaj<anzi; j++) {
4654       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4655         ba_i[j] += aa[nextaj++];
4656       }
4657     }
4658 
4659     /* add received vals into ba */
4660     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4661       /* i-th row */
4662       if (i == *nextrow[k]) {
4663         anzi   = *(nextai[k]+1) - *nextai[k];
4664         aj     = buf_rj[k] + *(nextai[k]);
4665         aa     = abuf_r[k] + *(nextai[k]);
4666         nextaj = 0;
4667         for (j=0; nextaj<anzi; j++) {
4668           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4669             ba_i[j] += aa[nextaj++];
4670           }
4671         }
4672         nextrow[k]++; nextai[k]++;
4673       }
4674     }
4675     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4676   }
4677   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4678   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679 
4680   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4681   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4682   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4683   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4684   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4689 
4690 #undef __FUNCT__
4691 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4692 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4693 {
4694   PetscErrorCode      ierr;
4695   Mat                 B_mpi;
4696   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4697   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4698   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4699   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4700   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4701   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4702   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4703   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4704   MPI_Status          *status;
4705   PetscFreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4706   PetscBT             lnkbt;
4707   Mat_Merge_SeqsToMPI *merge;
4708   PetscContainer      container;
4709 
4710   PetscFunctionBegin;
4711   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4712 
4713   /* make sure it is a PETSc comm */
4714   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4715   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4717 
4718   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4719   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4720 
4721   /* determine row ownership */
4722   /*---------------------------------------------------------*/
4723   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4724   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4725   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4726   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4727   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4728   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4729   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4730 
4731   m      = merge->rowmap->n;
4732   owners = merge->rowmap->range;
4733 
4734   /* determine the number of messages to send, their lengths */
4735   /*---------------------------------------------------------*/
4736   len_s = merge->len_s;
4737 
4738   len          = 0; /* length of buf_si[] */
4739   merge->nsend = 0;
4740   for (proc=0; proc<size; proc++) {
4741     len_si[proc] = 0;
4742     if (proc == rank) {
4743       len_s[proc] = 0;
4744     } else {
4745       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4746       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4747     }
4748     if (len_s[proc]) {
4749       merge->nsend++;
4750       nrows = 0;
4751       for (i=owners[proc]; i<owners[proc+1]; i++) {
4752         if (ai[i+1] > ai[i]) nrows++;
4753       }
4754       len_si[proc] = 2*(nrows+1);
4755       len         += len_si[proc];
4756     }
4757   }
4758 
4759   /* determine the number and length of messages to receive for ij-structure */
4760   /*-------------------------------------------------------------------------*/
4761   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4762   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4763 
4764   /* post the Irecv of j-structure */
4765   /*-------------------------------*/
4766   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4767   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4768 
4769   /* post the Isend of j-structure */
4770   /*--------------------------------*/
4771   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4772 
4773   for (proc=0, k=0; proc<size; proc++) {
4774     if (!len_s[proc]) continue;
4775     i    = owners[proc];
4776     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4777     k++;
4778   }
4779 
4780   /* receives and sends of j-structure are complete */
4781   /*------------------------------------------------*/
4782   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4783   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4784 
4785   /* send and recv i-structure */
4786   /*---------------------------*/
4787   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4788   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4789 
4790   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4791   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4792   for (proc=0,k=0; proc<size; proc++) {
4793     if (!len_s[proc]) continue;
4794     /* form outgoing message for i-structure:
4795          buf_si[0]:                 nrows to be sent
4796                [1:nrows]:           row index (global)
4797                [nrows+1:2*nrows+1]: i-structure index
4798     */
4799     /*-------------------------------------------*/
4800     nrows       = len_si[proc]/2 - 1;
4801     buf_si_i    = buf_si + nrows+1;
4802     buf_si[0]   = nrows;
4803     buf_si_i[0] = 0;
4804     nrows       = 0;
4805     for (i=owners[proc]; i<owners[proc+1]; i++) {
4806       anzi = ai[i+1] - ai[i];
4807       if (anzi) {
4808         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4809         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4810         nrows++;
4811       }
4812     }
4813     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4814     k++;
4815     buf_si += len_si[proc];
4816   }
4817 
4818   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4819   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4820 
4821   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4822   for (i=0; i<merge->nrecv; i++) {
4823     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4824   }
4825 
4826   ierr = PetscFree(len_si);CHKERRQ(ierr);
4827   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4828   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4829   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4830   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4831   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4832   ierr = PetscFree(status);CHKERRQ(ierr);
4833 
4834   /* compute a local seq matrix in each processor */
4835   /*----------------------------------------------*/
4836   /* allocate bi array and free space for accumulating nonzero column info */
4837   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4838   bi[0] = 0;
4839 
4840   /* create and initialize a linked list */
4841   nlnk = N+1;
4842   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4843 
4844   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4845   len  = ai[owners[rank+1]] - ai[owners[rank]];
4846   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4847 
4848   current_space = free_space;
4849 
4850   /* determine symbolic info for each local row */
4851   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4852 
4853   for (k=0; k<merge->nrecv; k++) {
4854     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4855     nrows       = *buf_ri_k[k];
4856     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4857     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4858   }
4859 
4860   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4861   len  = 0;
4862   for (i=0; i<m; i++) {
4863     bnzi = 0;
4864     /* add local non-zero cols of this proc's seqmat into lnk */
4865     arow  = owners[rank] + i;
4866     anzi  = ai[arow+1] - ai[arow];
4867     aj    = a->j + ai[arow];
4868     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4869     bnzi += nlnk;
4870     /* add received col data into lnk */
4871     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4872       if (i == *nextrow[k]) { /* i-th row */
4873         anzi  = *(nextai[k]+1) - *nextai[k];
4874         aj    = buf_rj[k] + *nextai[k];
4875         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4876         bnzi += nlnk;
4877         nextrow[k]++; nextai[k]++;
4878       }
4879     }
4880     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4881 
4882     /* if free space is not available, make more free space */
4883     if (current_space->local_remaining<bnzi) {
4884       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4885       nspacedouble++;
4886     }
4887     /* copy data into free space, then initialize lnk */
4888     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4889     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4890 
4891     current_space->array           += bnzi;
4892     current_space->local_used      += bnzi;
4893     current_space->local_remaining -= bnzi;
4894 
4895     bi[i+1] = bi[i] + bnzi;
4896   }
4897 
4898   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4899 
4900   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4901   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4902   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4903 
4904   /* create symbolic parallel matrix B_mpi */
4905   /*---------------------------------------*/
4906   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4907   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4908   if (n==PETSC_DECIDE) {
4909     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4910   } else {
4911     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4912   }
4913   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4914   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4915   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4916   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4917   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4918 
4919   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4920   B_mpi->assembled    = PETSC_FALSE;
4921   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4922   merge->bi           = bi;
4923   merge->bj           = bj;
4924   merge->buf_ri       = buf_ri;
4925   merge->buf_rj       = buf_rj;
4926   merge->coi          = PETSC_NULL;
4927   merge->coj          = PETSC_NULL;
4928   merge->owners_co    = PETSC_NULL;
4929 
4930   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4931 
4932   /* attach the supporting struct to B_mpi for reuse */
4933   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4934   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4935   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4936   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4937   *mpimat = B_mpi;
4938 
4939   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4945 /*@C
4946       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4947                  matrices from each processor
4948 
4949     Collective on MPI_Comm
4950 
4951    Input Parameters:
4952 +    comm - the communicators the parallel matrix will live on
4953 .    seqmat - the input sequential matrices
4954 .    m - number of local rows (or PETSC_DECIDE)
4955 .    n - number of local columns (or PETSC_DECIDE)
4956 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4957 
4958    Output Parameter:
4959 .    mpimat - the parallel matrix generated
4960 
4961     Level: advanced
4962 
4963    Notes:
4964      The dimensions of the sequential matrix in each processor MUST be the same.
4965      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4966      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4967 @*/
4968 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4969 {
4970   PetscErrorCode ierr;
4971   PetscMPIInt    size;
4972 
4973   PetscFunctionBegin;
4974   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4975   if (size == 1) {
4976     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4977     if (scall == MAT_INITIAL_MATRIX) {
4978       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4979     } else {
4980       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4981     }
4982     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4983     PetscFunctionReturn(0);
4984   }
4985   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4986   if (scall == MAT_INITIAL_MATRIX) {
4987     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4988   }
4989   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4990   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4991   PetscFunctionReturn(0);
4992 }
4993 
4994 #undef __FUNCT__
4995 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4996 /*@
4997      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4998           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4999           with MatGetSize()
5000 
5001     Not Collective
5002 
5003    Input Parameters:
5004 +    A - the matrix
5005 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5006 
5007    Output Parameter:
5008 .    A_loc - the local sequential matrix generated
5009 
5010     Level: developer
5011 
5012 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5013 
5014 @*/
5015 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5016 {
5017   PetscErrorCode ierr;
5018   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5019   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5020   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5021   MatScalar      *aa=a->a,*ba=b->a,*cam;
5022   PetscScalar    *ca;
5023   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5024   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5025   PetscBool      match;
5026 
5027   PetscFunctionBegin;
5028   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5029   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5030   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5031   if (scall == MAT_INITIAL_MATRIX) {
5032     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5033     ci[0] = 0;
5034     for (i=0; i<am; i++) {
5035       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5036     }
5037     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5038     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5039     k    = 0;
5040     for (i=0; i<am; i++) {
5041       ncols_o = bi[i+1] - bi[i];
5042       ncols_d = ai[i+1] - ai[i];
5043       /* off-diagonal portion of A */
5044       for (jo=0; jo<ncols_o; jo++) {
5045         col = cmap[*bj];
5046         if (col >= cstart) break;
5047         cj[k]   = col; bj++;
5048         ca[k++] = *ba++;
5049       }
5050       /* diagonal portion of A */
5051       for (j=0; j<ncols_d; j++) {
5052         cj[k]   = cstart + *aj++;
5053         ca[k++] = *aa++;
5054       }
5055       /* off-diagonal portion of A */
5056       for (j=jo; j<ncols_o; j++) {
5057         cj[k]   = cmap[*bj++];
5058         ca[k++] = *ba++;
5059       }
5060     }
5061     /* put together the new matrix */
5062     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5063     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5064     /* Since these are PETSc arrays, change flags to free them as necessary. */
5065     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5066     mat->free_a  = PETSC_TRUE;
5067     mat->free_ij = PETSC_TRUE;
5068     mat->nonew   = 0;
5069   } else if (scall == MAT_REUSE_MATRIX) {
5070     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5071     ci = mat->i; cj = mat->j; cam = mat->a;
5072     for (i=0; i<am; i++) {
5073       /* off-diagonal portion of A */
5074       ncols_o = bi[i+1] - bi[i];
5075       for (jo=0; jo<ncols_o; jo++) {
5076         col = cmap[*bj];
5077         if (col >= cstart) break;
5078         *cam++ = *ba++; bj++;
5079       }
5080       /* diagonal portion of A */
5081       ncols_d = ai[i+1] - ai[i];
5082       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5083       /* off-diagonal portion of A */
5084       for (j=jo; j<ncols_o; j++) {
5085         *cam++ = *ba++; bj++;
5086       }
5087     }
5088   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5089   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 #undef __FUNCT__
5094 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5095 /*@C
5096      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5097 
5098     Not Collective
5099 
5100    Input Parameters:
5101 +    A - the matrix
5102 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5103 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5104 
5105    Output Parameter:
5106 .    A_loc - the local sequential matrix generated
5107 
5108     Level: developer
5109 
5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5111 
5112 @*/
5113 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5114 {
5115   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5116   PetscErrorCode ierr;
5117   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5118   IS             isrowa,iscola;
5119   Mat            *aloc;
5120   PetscBool      match;
5121 
5122   PetscFunctionBegin;
5123   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5124   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5125   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5126   if (!row) {
5127     start = A->rmap->rstart; end = A->rmap->rend;
5128     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5129   } else {
5130     isrowa = *row;
5131   }
5132   if (!col) {
5133     start = A->cmap->rstart;
5134     cmap  = a->garray;
5135     nzA   = a->A->cmap->n;
5136     nzB   = a->B->cmap->n;
5137     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5138     ncols = 0;
5139     for (i=0; i<nzB; i++) {
5140       if (cmap[i] < start) idx[ncols++] = cmap[i];
5141       else break;
5142     }
5143     imark = i;
5144     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5145     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5146     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5147   } else {
5148     iscola = *col;
5149   }
5150   if (scall != MAT_INITIAL_MATRIX) {
5151     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5152     aloc[0] = *A_loc;
5153   }
5154   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5155   *A_loc = aloc[0];
5156   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5157   if (!row) {
5158     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5159   }
5160   if (!col) {
5161     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5162   }
5163   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 #undef __FUNCT__
5168 #define __FUNCT__ "MatGetBrowsOfAcols"
5169 /*@C
5170     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5171 
5172     Collective on Mat
5173 
5174    Input Parameters:
5175 +    A,B - the matrices in mpiaij format
5176 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5177 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5178 
5179    Output Parameter:
5180 +    rowb, colb - index sets of rows and columns of B to extract
5181 -    B_seq - the sequential matrix generated
5182 
5183     Level: developer
5184 
5185 @*/
5186 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5187 {
5188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5189   PetscErrorCode ierr;
5190   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5191   IS             isrowb,iscolb;
5192   Mat            *bseq=PETSC_NULL;
5193 
5194   PetscFunctionBegin;
5195   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5196     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5197   }
5198   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5199 
5200   if (scall == MAT_INITIAL_MATRIX) {
5201     start = A->cmap->rstart;
5202     cmap  = a->garray;
5203     nzA   = a->A->cmap->n;
5204     nzB   = a->B->cmap->n;
5205     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5206     ncols = 0;
5207     for (i=0; i<nzB; i++) {  /* row < local row index */
5208       if (cmap[i] < start) idx[ncols++] = cmap[i];
5209       else break;
5210     }
5211     imark = i;
5212     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5213     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5214     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5215     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5216   } else {
5217     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5218     isrowb  = *rowb; iscolb = *colb;
5219     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5220     bseq[0] = *B_seq;
5221   }
5222   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5223   *B_seq = bseq[0];
5224   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5225   if (!rowb) {
5226     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5227   } else {
5228     *rowb = isrowb;
5229   }
5230   if (!colb) {
5231     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5232   } else {
5233     *colb = iscolb;
5234   }
5235   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5236   PetscFunctionReturn(0);
5237 }
5238 
5239 #undef __FUNCT__
5240 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5241 /*
5242     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5243     of the OFF-DIAGONAL portion of local A
5244 
5245     Collective on Mat
5246 
5247    Input Parameters:
5248 +    A,B - the matrices in mpiaij format
5249 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5250 
5251    Output Parameter:
5252 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5253 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5254 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5255 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5256 
5257     Level: developer
5258 
5259 */
5260 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5261 {
5262   VecScatter_MPI_General *gen_to,*gen_from;
5263   PetscErrorCode         ierr;
5264   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5265   Mat_SeqAIJ             *b_oth;
5266   VecScatter             ctx =a->Mvctx;
5267   MPI_Comm               comm=((PetscObject)ctx)->comm;
5268   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5269   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5270   PetscScalar            *rvalues,*svalues;
5271   MatScalar              *b_otha,*bufa,*bufA;
5272   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5273   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5274   MPI_Status             *sstatus,rstatus;
5275   PetscMPIInt            jj;
5276   PetscInt               *cols,sbs,rbs;
5277   PetscScalar            *vals;
5278 
5279   PetscFunctionBegin;
5280   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5281     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5282   }
5283   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5284   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5285 
5286   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5287   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5288   rvalues  = gen_from->values; /* holds the length of receiving row */
5289   svalues  = gen_to->values;   /* holds the length of sending row */
5290   nrecvs   = gen_from->n;
5291   nsends   = gen_to->n;
5292 
5293   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5294   srow    = gen_to->indices;    /* local row index to be sent */
5295   sstarts = gen_to->starts;
5296   sprocs  = gen_to->procs;
5297   sstatus = gen_to->sstatus;
5298   sbs     = gen_to->bs;
5299   rstarts = gen_from->starts;
5300   rprocs  = gen_from->procs;
5301   rbs     = gen_from->bs;
5302 
5303   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5304   if (scall == MAT_INITIAL_MATRIX) {
5305     /* i-array */
5306     /*---------*/
5307     /*  post receives */
5308     for (i=0; i<nrecvs; i++) {
5309       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5310       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5311       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5312     }
5313 
5314     /* pack the outgoing message */
5315     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5316 
5317     sstartsj[0] = 0;
5318     rstartsj[0] = 0;
5319     len         = 0; /* total length of j or a array to be sent */
5320     k           = 0;
5321     for (i=0; i<nsends; i++) {
5322       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5323       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5324       for (j=0; j<nrows; j++) {
5325         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5326         for (l=0; l<sbs; l++) {
5327           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5328 
5329           rowlen[j*sbs+l] = ncols;
5330 
5331           len += ncols;
5332           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5333         }
5334         k++;
5335       }
5336       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5337 
5338       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5339     }
5340     /* recvs and sends of i-array are completed */
5341     i = nrecvs;
5342     while (i--) {
5343       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5344     }
5345     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5346 
5347     /* allocate buffers for sending j and a arrays */
5348     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5349     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5350 
5351     /* create i-array of B_oth */
5352     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5353 
5354     b_othi[0] = 0;
5355     len       = 0; /* total length of j or a array to be received */
5356     k         = 0;
5357     for (i=0; i<nrecvs; i++) {
5358       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5359       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5360       for (j=0; j<nrows; j++) {
5361         b_othi[k+1] = b_othi[k] + rowlen[j];
5362         len        += rowlen[j]; k++;
5363       }
5364       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5365     }
5366 
5367     /* allocate space for j and a arrrays of B_oth */
5368     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5369     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5370 
5371     /* j-array */
5372     /*---------*/
5373     /*  post receives of j-array */
5374     for (i=0; i<nrecvs; i++) {
5375       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5376       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5377     }
5378 
5379     /* pack the outgoing message j-array */
5380     k = 0;
5381     for (i=0; i<nsends; i++) {
5382       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5383       bufJ  = bufj+sstartsj[i];
5384       for (j=0; j<nrows; j++) {
5385         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5386         for (ll=0; ll<sbs; ll++) {
5387           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5388           for (l=0; l<ncols; l++) {
5389             *bufJ++ = cols[l];
5390           }
5391           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5392         }
5393       }
5394       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5395     }
5396 
5397     /* recvs and sends of j-array are completed */
5398     i = nrecvs;
5399     while (i--) {
5400       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5401     }
5402     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5403   } else if (scall == MAT_REUSE_MATRIX) {
5404     sstartsj = *startsj_s;
5405     rstartsj = *startsj_r;
5406     bufa     = *bufa_ptr;
5407     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5408     b_otha   = b_oth->a;
5409   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5410 
5411   /* a-array */
5412   /*---------*/
5413   /*  post receives of a-array */
5414   for (i=0; i<nrecvs; i++) {
5415     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5416     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5417   }
5418 
5419   /* pack the outgoing message a-array */
5420   k = 0;
5421   for (i=0; i<nsends; i++) {
5422     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5423     bufA  = bufa+sstartsj[i];
5424     for (j=0; j<nrows; j++) {
5425       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5426       for (ll=0; ll<sbs; ll++) {
5427         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5428         for (l=0; l<ncols; l++) {
5429           *bufA++ = vals[l];
5430         }
5431         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5432       }
5433     }
5434     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5435   }
5436   /* recvs and sends of a-array are completed */
5437   i = nrecvs;
5438   while (i--) {
5439     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5440   }
5441   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5442   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5443 
5444   if (scall == MAT_INITIAL_MATRIX) {
5445     /* put together the new matrix */
5446     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5447 
5448     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5449     /* Since these are PETSc arrays, change flags to free them as necessary. */
5450     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5451     b_oth->free_a  = PETSC_TRUE;
5452     b_oth->free_ij = PETSC_TRUE;
5453     b_oth->nonew   = 0;
5454 
5455     ierr = PetscFree(bufj);CHKERRQ(ierr);
5456     if (!startsj_s || !bufa_ptr) {
5457       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5458       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5459     } else {
5460       *startsj_s = sstartsj;
5461       *startsj_r = rstartsj;
5462       *bufa_ptr  = bufa;
5463     }
5464   }
5465   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5466   PetscFunctionReturn(0);
5467 }
5468 
5469 #undef __FUNCT__
5470 #define __FUNCT__ "MatGetCommunicationStructs"
5471 /*@C
5472   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5473 
5474   Not Collective
5475 
5476   Input Parameters:
5477 . A - The matrix in mpiaij format
5478 
5479   Output Parameter:
5480 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5481 . colmap - A map from global column index to local index into lvec
5482 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5483 
5484   Level: developer
5485 
5486 @*/
5487 #if defined(PETSC_USE_CTABLE)
5488 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5489 #else
5490 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5491 #endif
5492 {
5493   Mat_MPIAIJ *a;
5494 
5495   PetscFunctionBegin;
5496   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5497   PetscValidPointer(lvec, 2);
5498   PetscValidPointer(colmap, 3);
5499   PetscValidPointer(multScatter, 4);
5500   a = (Mat_MPIAIJ*) A->data;
5501   if (lvec) *lvec = a->lvec;
5502   if (colmap) *colmap = a->colmap;
5503   if (multScatter) *multScatter = a->Mvctx;
5504   PetscFunctionReturn(0);
5505 }
5506 
5507 EXTERN_C_BEGIN
5508 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5509 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5510 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5511 EXTERN_C_END
5512 
5513 #undef __FUNCT__
5514 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5515 /*
5516     Computes (B'*A')' since computing B*A directly is untenable
5517 
5518                n                       p                          p
5519         (              )       (              )         (                  )
5520       m (      A       )  *  n (       B      )   =   m (         C        )
5521         (              )       (              )         (                  )
5522 
5523 */
5524 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5525 {
5526   PetscErrorCode ierr;
5527   Mat            At,Bt,Ct;
5528 
5529   PetscFunctionBegin;
5530   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5531   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5532   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5533   ierr = MatDestroy(&At);CHKERRQ(ierr);
5534   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5535   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5536   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5537   PetscFunctionReturn(0);
5538 }
5539 
5540 #undef __FUNCT__
5541 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5542 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5543 {
5544   PetscErrorCode ierr;
5545   PetscInt       m=A->rmap->n,n=B->cmap->n;
5546   Mat            Cmat;
5547 
5548   PetscFunctionBegin;
5549   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5550   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5551   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5552   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5553   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5554   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5555   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5557 
5558   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5559 
5560   *C = Cmat;
5561   PetscFunctionReturn(0);
5562 }
5563 
5564 /* ----------------------------------------------------------------*/
5565 #undef __FUNCT__
5566 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5567 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5568 {
5569   PetscErrorCode ierr;
5570 
5571   PetscFunctionBegin;
5572   if (scall == MAT_INITIAL_MATRIX) {
5573     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5574   }
5575   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 EXTERN_C_BEGIN
5580 #if defined(PETSC_HAVE_MUMPS)
5581 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5582 #endif
5583 #if defined(PETSC_HAVE_PASTIX)
5584 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5585 #endif
5586 #if defined(PETSC_HAVE_SUPERLU_DIST)
5587 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5588 #endif
5589 #if defined(PETSC_HAVE_CLIQUE)
5590 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5591 #endif
5592 EXTERN_C_END
5593 
5594 /*MC
5595    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5596 
5597    Options Database Keys:
5598 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5599 
5600   Level: beginner
5601 
5602 .seealso: MatCreateAIJ()
5603 M*/
5604 
5605 EXTERN_C_BEGIN
5606 #undef __FUNCT__
5607 #define __FUNCT__ "MatCreate_MPIAIJ"
5608 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5609 {
5610   Mat_MPIAIJ     *b;
5611   PetscErrorCode ierr;
5612   PetscMPIInt    size;
5613 
5614   PetscFunctionBegin;
5615   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5616 
5617   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5618   B->data       = (void*)b;
5619   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5620   B->assembled  = PETSC_FALSE;
5621   B->insertmode = NOT_SET_VALUES;
5622   b->size       = size;
5623 
5624   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5625 
5626   /* build cache for off array entries formed */
5627   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5628 
5629   b->donotstash  = PETSC_FALSE;
5630   b->colmap      = 0;
5631   b->garray      = 0;
5632   b->roworiented = PETSC_TRUE;
5633 
5634   /* stuff used for matrix vector multiply */
5635   b->lvec  = PETSC_NULL;
5636   b->Mvctx = PETSC_NULL;
5637 
5638   /* stuff for MatGetRow() */
5639   b->rowindices   = 0;
5640   b->rowvalues    = 0;
5641   b->getrowactive = PETSC_FALSE;
5642 
5643   /* flexible pointer used in CUSP/CUSPARSE classes */
5644   b->spptr = PETSC_NULL;
5645 
5646 #if defined(PETSC_HAVE_MUMPS)
5647   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5648                                            "MatGetFactor_aij_mumps",
5649                                            MatGetFactor_aij_mumps);CHKERRQ(ierr);
5650 #endif
5651 #if defined(PETSC_HAVE_PASTIX)
5652   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5653                                            "MatGetFactor_mpiaij_pastix",
5654                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5655 #endif
5656 #if defined(PETSC_HAVE_SUPERLU_DIST)
5657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5658                                            "MatGetFactor_mpiaij_superlu_dist",
5659                                            MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5660 #endif
5661 #if defined(PETSC_HAVE_CLIQUE)
5662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5663                                            "MatGetFactor_aij_clique",
5664                                            MatGetFactor_aij_clique);CHKERRQ(ierr);
5665 #endif
5666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5667                                            "MatStoreValues_MPIAIJ",
5668                                            MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5670                                            "MatRetrieveValues_MPIAIJ",
5671                                            MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5673                                            "MatGetDiagonalBlock_MPIAIJ",
5674                                            MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5676                                            "MatIsTranspose_MPIAIJ",
5677                                            MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5679                                            "MatMPIAIJSetPreallocation_MPIAIJ",
5680                                            MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5682                                            "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5683                                            MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5685                                            "MatDiagonalScaleLocal_MPIAIJ",
5686                                            MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5688                                            "MatConvert_MPIAIJ_MPIAIJPERM",
5689                                            MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5691                                            "MatConvert_MPIAIJ_MPIAIJCRL",
5692                                            MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5694                                            "MatConvert_MPIAIJ_MPISBAIJ",
5695                                            MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5697                                            "MatMatMult_MPIDense_MPIAIJ",
5698                                            MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5700                                            "MatMatMultSymbolic_MPIDense_MPIAIJ",
5701                                            MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5703                                            "MatMatMultNumeric_MPIDense_MPIAIJ",
5704                                            MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5705   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5706   PetscFunctionReturn(0);
5707 }
5708 EXTERN_C_END
5709 
5710 #undef __FUNCT__
5711 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5712 /*@
5713      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5714          and "off-diagonal" part of the matrix in CSR format.
5715 
5716    Collective on MPI_Comm
5717 
5718    Input Parameters:
5719 +  comm - MPI communicator
5720 .  m - number of local rows (Cannot be PETSC_DECIDE)
5721 .  n - This value should be the same as the local size used in creating the
5722        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5723        calculated if N is given) For square matrices n is almost always m.
5724 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5725 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5726 .   i - row indices for "diagonal" portion of matrix
5727 .   j - column indices
5728 .   a - matrix values
5729 .   oi - row indices for "off-diagonal" portion of matrix
5730 .   oj - column indices
5731 -   oa - matrix values
5732 
5733    Output Parameter:
5734 .   mat - the matrix
5735 
5736    Level: advanced
5737 
5738    Notes:
5739        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5740        must free the arrays once the matrix has been destroyed and not before.
5741 
5742        The i and j indices are 0 based
5743 
5744        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5745 
5746        This sets local rows and cannot be used to set off-processor values.
5747 
5748        You cannot later use MatSetValues() to change values in this matrix.
5749 
5750 .keywords: matrix, aij, compressed row, sparse, parallel
5751 
5752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5753           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5754 @*/
5755 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5756 {
5757   PetscErrorCode ierr;
5758   Mat_MPIAIJ     *maij;
5759 
5760   PetscFunctionBegin;
5761   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5762   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5763   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5764   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5765   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5766   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5767   maij = (Mat_MPIAIJ*) (*mat)->data;
5768 
5769   maij->donotstash     = PETSC_TRUE;
5770   (*mat)->preallocated = PETSC_TRUE;
5771 
5772   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5773   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5774 
5775   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5776   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5777 
5778   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5779   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5780   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5781   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5782 
5783   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5784   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5785   PetscFunctionReturn(0);
5786 }
5787 
5788 /*
5789     Special version for direct calls from Fortran
5790 */
5791 #include <petsc-private/fortranimpl.h>
5792 
5793 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5794 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5795 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5796 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5797 #endif
5798 
5799 /* Change these macros so can be used in void function */
5800 #undef CHKERRQ
5801 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5802 #undef SETERRQ2
5803 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5804 #undef SETERRQ3
5805 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5806 #undef SETERRQ
5807 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5808 
5809 EXTERN_C_BEGIN
5810 #undef __FUNCT__
5811 #define __FUNCT__ "matsetvaluesmpiaij_"
5812 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5813 {
5814   Mat            mat  = *mmat;
5815   PetscInt       m    = *mm, n = *mn;
5816   InsertMode     addv = *maddv;
5817   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5818   PetscScalar    value;
5819   PetscErrorCode ierr;
5820 
5821   MatCheckPreallocated(mat,1);
5822   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5823 
5824 #if defined(PETSC_USE_DEBUG)
5825   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5826 #endif
5827   {
5828     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5829     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5830     PetscBool roworiented = aij->roworiented;
5831 
5832     /* Some Variables required in the macro */
5833     Mat        A                 = aij->A;
5834     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5835     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5836     MatScalar  *aa               = a->a;
5837     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5838     Mat        B                 = aij->B;
5839     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5840     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5841     MatScalar  *ba               = b->a;
5842 
5843     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5844     PetscInt  nonew = a->nonew;
5845     MatScalar *ap1,*ap2;
5846 
5847     PetscFunctionBegin;
5848     for (i=0; i<m; i++) {
5849       if (im[i] < 0) continue;
5850 #if defined(PETSC_USE_DEBUG)
5851       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5852 #endif
5853       if (im[i] >= rstart && im[i] < rend) {
5854         row      = im[i] - rstart;
5855         lastcol1 = -1;
5856         rp1      = aj + ai[row];
5857         ap1      = aa + ai[row];
5858         rmax1    = aimax[row];
5859         nrow1    = ailen[row];
5860         low1     = 0;
5861         high1    = nrow1;
5862         lastcol2 = -1;
5863         rp2      = bj + bi[row];
5864         ap2      = ba + bi[row];
5865         rmax2    = bimax[row];
5866         nrow2    = bilen[row];
5867         low2     = 0;
5868         high2    = nrow2;
5869 
5870         for (j=0; j<n; j++) {
5871           if (roworiented) value = v[i*n+j];
5872           else value = v[i+j*m];
5873           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5874           if (in[j] >= cstart && in[j] < cend) {
5875             col = in[j] - cstart;
5876             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5877           } else if (in[j] < 0) continue;
5878 #if defined(PETSC_USE_DEBUG)
5879           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5880 #endif
5881           else {
5882             if (mat->was_assembled) {
5883               if (!aij->colmap) {
5884                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5885               }
5886 #if defined(PETSC_USE_CTABLE)
5887               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5888               col--;
5889 #else
5890               col = aij->colmap[in[j]] - 1;
5891 #endif
5892               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5893                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5894                 col  =  in[j];
5895                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5896                 B     = aij->B;
5897                 b     = (Mat_SeqAIJ*)B->data;
5898                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5899                 rp2   = bj + bi[row];
5900                 ap2   = ba + bi[row];
5901                 rmax2 = bimax[row];
5902                 nrow2 = bilen[row];
5903                 low2  = 0;
5904                 high2 = nrow2;
5905                 bm    = aij->B->rmap->n;
5906                 ba    = b->a;
5907               }
5908             } else col = in[j];
5909             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5910           }
5911         }
5912       } else if (!aij->donotstash) {
5913         if (roworiented) {
5914           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5915         } else {
5916           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5917         }
5918       }
5919     }
5920   }
5921   PetscFunctionReturnVoid();
5922 }
5923 EXTERN_C_END
5924 
5925