xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 
6 /*MC
7    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8 
9    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
10    and MATMPIAIJ otherwise.  As a result, for single process communicators,
11   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
12   for communicators controlling multiple processes.  It is recommended that you call both of
13   the above preallocation routines for simplicity.
14 
15    Options Database Keys:
16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17 
18   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
19    enough exist.
20 
21   Level: beginner
22 
23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
24 M*/
25 
26 /*MC
27    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28 
29    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
30    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
31    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
32   for communicators controlling multiple processes.  It is recommended that you call both of
33   the above preallocation routines for simplicity.
34 
35    Options Database Keys:
36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37 
38   Level: beginner
39 
40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
41 M*/
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
46 {
47   PetscErrorCode  ierr;
48   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
49   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
50   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
51   const PetscInt  *ia,*ib;
52   const MatScalar *aa,*bb;
53   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
54   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
55 
56   PetscFunctionBegin;
57   *keptrows = 0;
58   ia        = a->i;
59   ib        = b->i;
60   for (i=0; i<m; i++) {
61     na = ia[i+1] - ia[i];
62     nb = ib[i+1] - ib[i];
63     if (!na && !nb) {
64       cnt++;
65       goto ok1;
66     }
67     aa = a->a + ia[i];
68     for (j=0; j<na; j++) {
69       if (aa[j] != 0.0) goto ok1;
70     }
71     bb = b->a + ib[i];
72     for (j=0; j <nb; j++) {
73       if (bb[j] != 0.0) goto ok1;
74     }
75     cnt++;
76 ok1:;
77   }
78   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
79   if (!n0rows) PetscFunctionReturn(0);
80   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
81   cnt  = 0;
82   for (i=0; i<m; i++) {
83     na = ia[i+1] - ia[i];
84     nb = ib[i+1] - ib[i];
85     if (!na && !nb) continue;
86     aa = a->a + ia[i];
87     for (j=0; j<na;j++) {
88       if (aa[j] != 0.0) {
89         rows[cnt++] = rstart + i;
90         goto ok2;
91       }
92     }
93     bb = b->a + ib[i];
94     for (j=0; j<nb; j++) {
95       if (bb[j] != 0.0) {
96         rows[cnt++] = rstart + i;
97         goto ok2;
98       }
99     }
100 ok2:;
101   }
102   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
109 {
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
111   PetscErrorCode ierr;
112   PetscInt       i,rstart,nrows,*rows;
113 
114   PetscFunctionBegin;
115   *zrows = NULL;
116   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
117   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
118   for (i=0; i<nrows; i++) rows[i] += rstart;
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
126 {
127   PetscErrorCode ierr;
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
129   PetscInt       i,n,*garray = aij->garray;
130   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
131   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
132   PetscReal      *work;
133 
134   PetscFunctionBegin;
135   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
136   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
137   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 */
181 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
182 {
183   PetscMPIInt    rank,size;
184   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
185   PetscErrorCode ierr;
186   Mat            mat;
187   Mat_SeqAIJ     *gmata;
188   PetscMPIInt    tag;
189   MPI_Status     status;
190   PetscBool      aij;
191   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
192 
193   PetscFunctionBegin;
194   CHKMEMQ;
195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
197   if (!rank) {
198     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
199     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
200   }
201   if (reuse == MAT_INITIAL_MATRIX) {
202     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
203     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
204     if (!rank) {
205       bses[0] = gmat->rmap->bs;
206       bses[1] = gmat->cmap->bs;
207     }
208     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
209     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
210     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
211     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
212     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
213     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
214 
215     rowners[0] = 0;
216     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj   = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298 
299     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
300 
301     *inmat = mat;
302   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
303     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
304     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
305     mat  = *inmat;
306     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
307     if (!rank) {
308       /* send numerical values to other processes */
309       gmata  = (Mat_SeqAIJ*) gmat->data;
310       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
311       gmataa = gmata->a;
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
315       }
316       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
317     } else {
318       /* receive numerical values from process 0*/
319       nz   = Ad->nz + Ao->nz;
320       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
321       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
322     }
323     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
324     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
325     ad = Ad->a;
326     ao = Ao->a;
327     if (mat->rmap->n) {
328       i  = 0;
329       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     for (i=1; i<mat->rmap->n; i++) {
333       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
334       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
335     }
336     i--;
337     if (mat->rmap->n) {
338       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
339     }
340     if (rank) {
341       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
342     }
343   }
344   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
346   CHKMEMQ;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351   Local utility routine that creates a mapping from the global column
352 number to the local number in the off-diagonal part of the local
353 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
354 a slightly higher hash table cost; without it it is not scalable (each processor
355 has an order N integer array but is fast to acess.
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
360 {
361   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       n = aij->B->cmap->n,i;
364 
365   PetscFunctionBegin;
366   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
367 #if defined(PETSC_USE_CTABLE)
368   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
369   for (i=0; i<n; i++) {
370     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
371   }
372 #else
373   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
374   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
382 { \
383     if (col <= lastcol1)  low1 = 0;     \
384     else                 high1 = nrow1; \
385     lastcol1 = col;\
386     while (high1-low1 > 5) { \
387       t = (low1+high1)/2; \
388       if (rp1[t] > col) high1 = t; \
389       else              low1  = t; \
390     } \
391       for (_i=low1; _i<high1; _i++) { \
392         if (rp1[_i] > col) break; \
393         if (rp1[_i] == col) { \
394           if (addv == ADD_VALUES) ap1[_i] += value;   \
395           else                    ap1[_i] = value; \
396           goto a_noinsert; \
397         } \
398       }  \
399       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
400       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
401       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
402       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
403       N = nrow1++ - 1; a->nz++; high1++; \
404       /* shift up all the later entries in this row */ \
405       for (ii=N; ii>=_i; ii--) { \
406         rp1[ii+1] = rp1[ii]; \
407         ap1[ii+1] = ap1[ii]; \
408       } \
409       rp1[_i] = col;  \
410       ap1[_i] = value;  \
411       a_noinsert: ; \
412       ailen[row] = nrow1; \
413 }
414 
415 
416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
417   { \
418     if (col <= lastcol2) low2 = 0;                        \
419     else high2 = nrow2;                                   \
420     lastcol2 = col;                                       \
421     while (high2-low2 > 5) {                              \
422       t = (low2+high2)/2;                                 \
423       if (rp2[t] > col) high2 = t;                        \
424       else             low2  = t;                         \
425     }                                                     \
426     for (_i=low2; _i<high2; _i++) {                       \
427       if (rp2[_i] > col) break;                           \
428       if (rp2[_i] == col) {                               \
429         if (addv == ADD_VALUES) ap2[_i] += value;         \
430         else                    ap2[_i] = value;          \
431         goto b_noinsert;                                  \
432       }                                                   \
433     }                                                     \
434     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
435     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
436     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
437     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
438     N = nrow2++ - 1; b->nz++; high2++;                    \
439     /* shift up all the later entries in this row */      \
440     for (ii=N; ii>=_i; ii--) {                            \
441       rp2[ii+1] = rp2[ii];                                \
442       ap2[ii+1] = ap2[ii];                                \
443     }                                                     \
444     rp2[_i] = col;                                        \
445     ap2[_i] = value;                                      \
446     b_noinsert: ;                                         \
447     bilen[row] = nrow2;                                   \
448   }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
453 {
454   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
455   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
456   PetscErrorCode ierr;
457   PetscInt       l,*garray = mat->garray,diag;
458 
459   PetscFunctionBegin;
460   /* code only works for square matrices A */
461 
462   /* find size of row to the left of the diagonal part */
463   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
464   row  = row - diag;
465   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
466     if (garray[b->j[b->i[row]+l]] > diag) break;
467   }
468   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* diagonal part */
471   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
472 
473   /* right of diagonal part */
474   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSetValues_MPIAIJ"
480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   PetscScalar    value;
484   PetscErrorCode ierr;
485   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
486   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
487   PetscBool      roworiented = aij->roworiented;
488 
489   /* Some Variables required in the macro */
490   Mat        A                 = aij->A;
491   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
492   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
493   MatScalar  *aa               = a->a;
494   PetscBool  ignorezeroentries = a->ignorezeroentries;
495   Mat        B                 = aij->B;
496   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
497   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
498   MatScalar  *ba               = b->a;
499 
500   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
501   PetscInt  nonew;
502   MatScalar *ap1,*ap2;
503 
504   PetscFunctionBegin;
505   if (v) PetscValidScalarPointer(v,6);
506   for (i=0; i<m; i++) {
507     if (im[i] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
510 #endif
511     if (im[i] >= rstart && im[i] < rend) {
512       row      = im[i] - rstart;
513       lastcol1 = -1;
514       rp1      = aj + ai[row];
515       ap1      = aa + ai[row];
516       rmax1    = aimax[row];
517       nrow1    = ailen[row];
518       low1     = 0;
519       high1    = nrow1;
520       lastcol2 = -1;
521       rp2      = bj + bi[row];
522       ap2      = ba + bi[row];
523       rmax2    = bimax[row];
524       nrow2    = bilen[row];
525       low2     = 0;
526       high2    = nrow2;
527 
528       for (j=0; j<n; j++) {
529         if (v) {
530           if (roworiented) value = v[i*n+j];
531           else             value = v[i+j*m];
532         } else value = 0.0;
533         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
534         if (in[j] >= cstart && in[j] < cend) {
535           col   = in[j] - cstart;
536           nonew = a->nonew;
537           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
538         } else if (in[j] < 0) continue;
539 #if defined(PETSC_USE_DEBUG)
540         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
541 #endif
542         else {
543           if (mat->was_assembled) {
544             if (!aij->colmap) {
545               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
546             }
547 #if defined(PETSC_USE_CTABLE)
548             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
549             col--;
550 #else
551             col = aij->colmap[in[j]] - 1;
552 #endif
553             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
554               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
555               col  =  in[j];
556               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
557               B     = aij->B;
558               b     = (Mat_SeqAIJ*)B->data;
559               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
560               rp2   = bj + bi[row];
561               ap2   = ba + bi[row];
562               rmax2 = bimax[row];
563               nrow2 = bilen[row];
564               low2  = 0;
565               high2 = nrow2;
566               bm    = aij->B->rmap->n;
567               ba    = b->a;
568             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
569           } else col = in[j];
570           nonew = b->nonew;
571           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
572         }
573       }
574     } else {
575       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
576       if (!aij->donotstash) {
577         mat->assembled = PETSC_FALSE;
578         if (roworiented) {
579           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         } else {
581           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
582         }
583       }
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatGetValues_MPIAIJ"
591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
592 {
593   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
597 
598   PetscFunctionBegin;
599   for (i=0; i<m; i++) {
600     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
601     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
602     if (idxm[i] >= rstart && idxm[i] < rend) {
603       row = idxm[i] - rstart;
604       for (j=0; j<n; j++) {
605         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
606         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
607         if (idxn[j] >= cstart && idxn[j] < cend) {
608           col  = idxn[j] - cstart;
609           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
610         } else {
611           if (!aij->colmap) {
612             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
613           }
614 #if defined(PETSC_USE_CTABLE)
615           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
616           col--;
617 #else
618           col = aij->colmap[idxn[j]] - 1;
619 #endif
620           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
621           else {
622             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
623           }
624         }
625       }
626     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       nstash,reallocs;
640   InsertMode     addv;
641 
642   PetscFunctionBegin;
643   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n; ) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) {
681           if (row[j] != rstart) break;
682         }
683         if (j < n) ncols = j-i;
684         else       ncols = n-i;
685         /* Now assemble all these values with a single function call */
686         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
687 
688         i = j;
689       }
690     }
691     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
692   }
693   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
694   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
695 
696   /* determine if any processor has disassembled, if so we must
697      also disassemble ourselfs, in order that we may reassemble. */
698   /*
699      if nonzero structure of submatrix B cannot change then we know that
700      no processor disassembled thus we can skip this stuff
701   */
702   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
703     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
704     if (mat->was_assembled && !other_disassembled) {
705       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
706     }
707   }
708   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
709     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
710   }
711   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
749   PetscInt          i,*owners = A->rmap->range;
750   PetscInt          *nprocs,j,idx,nsends,row;
751   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
752   PetscInt          *rvalues,count,base,slen,*source;
753   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
754   MPI_Comm          comm;
755   MPI_Request       *send_waits,*recv_waits;
756   MPI_Status        recv_status,*send_status;
757   const PetscScalar *xx;
758   PetscScalar       *bb;
759 #if defined(PETSC_DEBUG)
760   PetscBool found = PETSC_FALSE;
761 #endif
762 
763   PetscFunctionBegin;
764   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
765   /*  first count number of contributors to each processor */
766   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
767   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
768   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
769   j    = 0;
770   for (i=0; i<N; i++) {
771     if (lastidx > (idx = rows[i])) j = 0;
772     lastidx = idx;
773     for (; j<size; j++) {
774       if (idx >= owners[j] && idx < owners[j+1]) {
775         nprocs[2*j]++;
776         nprocs[2*j+1] = 1;
777         owner[i]      = j;
778 #if defined(PETSC_DEBUG)
779         found = PETSC_TRUE;
780 #endif
781         break;
782       }
783     }
784 #if defined(PETSC_DEBUG)
785     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
786     found = PETSC_FALSE;
787 #endif
788   }
789   nsends = 0;
790   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
791 
792   if (A->nooffproczerorows) {
793     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
794     nrecvs = nsends;
795     nmax   = N;
796   } else {
797     /* inform other processors of number of messages and max length*/
798     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
799   }
800 
801   /* post receives:   */
802   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
804   for (i=0; i<nrecvs; i++) {
805     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
806   }
807 
808   /* do sends:
809       1) starts[i] gives the starting index in svalues for stuff going to
810          the ith processor
811   */
812   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
813   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
814   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
815 
816   starts[0] = 0;
817   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
818   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
819 
820   starts[0] = 0;
821   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
822   count = 0;
823   for (i=0; i<size; i++) {
824     if (nprocs[2*i+1]) {
825       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
826     }
827   }
828   ierr = PetscFree(starts);CHKERRQ(ierr);
829 
830   base = owners[rank];
831 
832   /*  wait on receives */
833   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
834   count = nrecvs; slen = 0;
835   while (count) {
836     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
837     /* unpack receives into our local space */
838     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
839 
840     source[imdex] = recv_status.MPI_SOURCE;
841     lens[imdex]   = n;
842     slen         += n;
843     count--;
844   }
845   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
846 
847   /* move the data into the send scatter */
848   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
849   count = 0;
850   for (i=0; i<nrecvs; i++) {
851     values = rvalues + i*nmax;
852     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
853   }
854   ierr = PetscFree(rvalues);CHKERRQ(ierr);
855   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
856   ierr = PetscFree(owner);CHKERRQ(ierr);
857   ierr = PetscFree(nprocs);CHKERRQ(ierr);
858 
859   /* fix right hand side if needed */
860   if (x && b) {
861     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
862     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
863     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
864     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
865     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
866   }
867   /*
868         Zero the required rows. If the "diagonal block" of the matrix
869      is square and the user wishes to set the diagonal we use separate
870      code so that MatSetValues() is not called for each diagonal allocating
871      new memory, thus calling lots of mallocs and slowing things down.
872 
873   */
874   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
875   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
876   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
877     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
878   } else if (diag != 0.0) {
879     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
880     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     for (i = 0; i < slen; i++) {
882       row  = lrows[i] + rstart;
883       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
884     }
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   } else {
888     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(lrows);CHKERRQ(ierr);
891 
892   /* wait on sends */
893   if (nsends) {
894     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
895     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
896     ierr = PetscFree(send_status);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(send_waits);CHKERRQ(ierr);
899   ierr = PetscFree(svalues);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
906 {
907   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
908   PetscErrorCode    ierr;
909   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
910   PetscInt          i,*owners = A->rmap->range;
911   PetscInt          *nprocs,j,idx,nsends;
912   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
913   PetscInt          *rvalues,count,base,slen,*source;
914   PetscInt          *lens,*lrows,*values,m;
915   MPI_Comm          comm;
916   MPI_Request       *send_waits,*recv_waits;
917   MPI_Status        recv_status,*send_status;
918   const PetscScalar *xx;
919   PetscScalar       *bb,*mask;
920   Vec               xmask,lmask;
921   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
922   const PetscInt    *aj, *ii,*ridx;
923   PetscScalar       *aa;
924 #if defined(PETSC_DEBUG)
925   PetscBool found = PETSC_FALSE;
926 #endif
927 
928   PetscFunctionBegin;
929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j    = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i]      = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973 
974   starts[0] = 0;
975   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
976   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
977 
978   starts[0] = 0;
979   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
980   count = 0;
981   for (i=0; i<size; i++) {
982     if (nprocs[2*i+1]) {
983       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
984     }
985   }
986   ierr = PetscFree(starts);CHKERRQ(ierr);
987 
988   base = owners[rank];
989 
990   /*  wait on receives */
991   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
992   count = nrecvs; slen = 0;
993   while (count) {
994     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
995     /* unpack receives into our local space */
996     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
997 
998     source[imdex] = recv_status.MPI_SOURCE;
999     lens[imdex]   = n;
1000     slen         += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1011   }
1012   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1013   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1014   ierr = PetscFree(owner);CHKERRQ(ierr);
1015   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1016   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1017 
1018   /* zero diagonal part of matrix */
1019   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1020 
1021   /* handle off diagonal part of matrix */
1022   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1023   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1024   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1025   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use) {
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++) {
1050       n  = ii[i+1] - ii[i];
1051       aj = aij->j + ii[i];
1052       aa = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n  = ii[i+1] - ii[i];
1068       aj = aij->j + ii[i];
1069       aa = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMult_MPIAIJ"
1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105   PetscInt       nt;
1106 
1107   PetscFunctionBegin;
1108   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1109   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1110   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150   PetscBool      merged;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1154   /* do nondiagonal part */
1155   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1156   if (!merged) {
1157     /* send it on its way */
1158     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     /* do local part */
1160     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1161     /* receive remote parts: note this assumes the values are not actually */
1162     /* added in yy until the next line, */
1163     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   } else {
1165     /* do local part */
1166     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1167     /* send it on its way */
1168     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     /* values actually were received in the Begin() but we need to call this nop */
1170     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1177 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1178 {
1179   MPI_Comm       comm;
1180   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1181   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1182   IS             Me,Notme;
1183   PetscErrorCode ierr;
1184   PetscInt       M,N,first,last,*notme,i;
1185   PetscMPIInt    size;
1186 
1187   PetscFunctionBegin;
1188   /* Easy test: symmetric diagonal block */
1189   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1190   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1191   if (!*f) PetscFunctionReturn(0);
1192   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1194   if (size == 1) PetscFunctionReturn(0);
1195 
1196   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1197   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1198   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1199   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1200   for (i=0; i<first; i++) notme[i] = i;
1201   for (i=last; i<M; i++) notme[i-last+first] = i;
1202   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1203   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1205   Aoff = Aoffs[0];
1206   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1207   Boff = Boffs[0];
1208   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1209   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1211   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1213   ierr = PetscFree(notme);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1220 {
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   /* do nondiagonal part */
1226   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1227   /* send it on its way */
1228   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   /* do local part */
1230   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1231   /* receive remote parts */
1232   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 /*
1237   This only works correctly for square matrices where the subblock A->A is the
1238    diagonal block
1239 */
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1243 {
1244   PetscErrorCode ierr;
1245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1246 
1247   PetscFunctionBegin;
1248   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1249   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1250   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatScale_MPIAIJ"
1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1257 {
1258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1263   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_MPIAIJ"
1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1270 {
1271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1277 #endif
1278   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1279   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1282 #if defined(PETSC_USE_CTABLE)
1283   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1284 #else
1285   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1286 #endif
1287   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1288   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1289   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1290   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1291   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1312   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1313   PetscErrorCode ierr;
1314   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1315   int            fd;
1316   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1317   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1318   PetscScalar    *column_values;
1319   PetscInt       message_count,flowcontrolcount;
1320   FILE           *file;
1321 
1322   PetscFunctionBegin;
1323   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1324   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1325   nz   = A->nz + B->nz;
1326   if (!rank) {
1327     header[0] = MAT_FILE_CLASSID;
1328     header[1] = mat->rmap->N;
1329     header[2] = mat->cmap->N;
1330 
1331     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1333     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     /* get largest number of rows any processor has */
1335     rlen  = mat->rmap->n;
1336     range = mat->rmap->range;
1337     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1338   } else {
1339     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340     rlen = mat->rmap->n;
1341   }
1342 
1343   /* load up the local row counts */
1344   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1345   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1346 
1347   /* store the row lengths to the file */
1348   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1349   if (!rank) {
1350     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1351     for (i=1; i<size; i++) {
1352       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1353       rlen = range[i+1] - range[i];
1354       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1355       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1356     }
1357     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1358   } else {
1359     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1360     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1364 
1365   /* load up the local column indices */
1366   nzmax = nz; /* th processor needs space a largest processor needs */
1367   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1368   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1369   cnt   = 0;
1370   for (i=0; i<mat->rmap->n; i++) {
1371     for (j=B->i[i]; j<B->i[i+1]; j++) {
1372       if ((col = garray[B->j[j]]) > cstart) break;
1373       column_indices[cnt++] = col;
1374     }
1375     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1376     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1377   }
1378   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1379 
1380   /* store the column indices to the file */
1381   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1382   if (!rank) {
1383     MPI_Status status;
1384     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1385     for (i=1; i<size; i++) {
1386       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1387       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1388       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1389       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1391     }
1392     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1393   } else {
1394     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1395     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1396     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1398   }
1399   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1400 
1401   /* load up the local column values */
1402   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1403   cnt  = 0;
1404   for (i=0; i<mat->rmap->n; i++) {
1405     for (j=B->i[i]; j<B->i[i+1]; j++) {
1406       if (garray[B->j[j]] > cstart) break;
1407       column_values[cnt++] = B->a[j];
1408     }
1409     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1410     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1411   }
1412   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1413 
1414   /* store the column values to the file */
1415   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1416   if (!rank) {
1417     MPI_Status status;
1418     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1419     for (i=1; i<size; i++) {
1420       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1421       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1422       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1423       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1424       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1425     }
1426     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1427   } else {
1428     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1429     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1430     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1432   }
1433   ierr = PetscFree(column_values);CHKERRQ(ierr);
1434 
1435   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1436   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #include <petscdraw.h>
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1444 {
1445   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1446   PetscErrorCode    ierr;
1447   PetscMPIInt       rank = aij->rank,size = aij->size;
1448   PetscBool         isdraw,iascii,isbinary;
1449   PetscViewer       sviewer;
1450   PetscViewerFormat format;
1451 
1452   PetscFunctionBegin;
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1456   if (iascii) {
1457     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1458     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1459       MatInfo   info;
1460       PetscBool inodes;
1461 
1462       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1463       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1466       if (!inodes) {
1467         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1468                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1469       } else {
1470         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1471                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1472       }
1473       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1475       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1477       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1478       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1480       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1481       PetscFunctionReturn(0);
1482     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1483       PetscInt inodecount,inodelimit,*inodes;
1484       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1485       if (inodes) {
1486         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1487       } else {
1488         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1489       }
1490       PetscFunctionReturn(0);
1491     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1492       PetscFunctionReturn(0);
1493     }
1494   } else if (isbinary) {
1495     if (size == 1) {
1496       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1497       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1498     } else {
1499       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1500     }
1501     PetscFunctionReturn(0);
1502   } else if (isdraw) {
1503     PetscDraw draw;
1504     PetscBool isnull;
1505     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1506     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1507   }
1508 
1509   if (size == 1) {
1510     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1511     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1512   } else {
1513     /* assemble the entire matrix onto first processor. */
1514     Mat        A;
1515     Mat_SeqAIJ *Aloc;
1516     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1517     MatScalar  *a;
1518 
1519     if (mat->rmap->N > 1024) {
1520       PetscBool flg = PETSC_FALSE;
1521 
1522       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1523       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1524     }
1525 
1526     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1527     if (!rank) {
1528       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1529     } else {
1530       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1531     }
1532     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1533     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1534     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1535     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1536     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1537 
1538     /* copy over the A part */
1539     Aloc = (Mat_SeqAIJ*)aij->A->data;
1540     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1541     row  = mat->rmap->rstart;
1542     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1543     for (i=0; i<m; i++) {
1544       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1545       row++;
1546       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1547     }
1548     aj = Aloc->j;
1549     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1550 
1551     /* copy over the B part */
1552     Aloc = (Mat_SeqAIJ*)aij->B->data;
1553     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1554     row  = mat->rmap->rstart;
1555     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1556     ct   = cols;
1557     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1558     for (i=0; i<m; i++) {
1559       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1560       row++;
1561       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1562     }
1563     ierr = PetscFree(ct);CHKERRQ(ierr);
1564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     /*
1567        Everyone has to call to draw the matrix since the graphics waits are
1568        synchronized across all processors that share the PetscDraw object
1569     */
1570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1571     if (!rank) {
1572       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1573       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1574       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1575       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1576     }
1577     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1578     ierr = MatDestroy(&A);CHKERRQ(ierr);
1579   }
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatView_MPIAIJ"
1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1586 {
1587   PetscErrorCode ierr;
1588   PetscBool      iascii,isdraw,issocket,isbinary;
1589 
1590   PetscFunctionBegin;
1591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1595   if (iascii || isdraw || isbinary || issocket) {
1596     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatSOR_MPIAIJ"
1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1604 {
1605   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1606   PetscErrorCode ierr;
1607   Vec            bb1 = 0;
1608   PetscBool      hasop;
1609 
1610   PetscFunctionBegin;
1611   if (flag == SOR_APPLY_UPPER) {
1612     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613     PetscFunctionReturn(0);
1614   }
1615 
1616   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1617     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1618   }
1619 
1620   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1621     if (flag & SOR_ZERO_INITIAL_GUESS) {
1622       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1623       its--;
1624     }
1625 
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1654     if (flag & SOR_ZERO_INITIAL_GUESS) {
1655       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1656       its--;
1657     }
1658     while (its--) {
1659       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661 
1662       /* update rhs: bb1 = bb - B*x */
1663       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1664       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1665 
1666       /* local sweep */
1667       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1668     }
1669   } else if (flag & SOR_EISENSTAT) {
1670     Vec xx1;
1671 
1672     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1673     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1674 
1675     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1676     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     if (!mat->diag) {
1678       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1679       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1680     }
1681     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1682     if (hasop) {
1683       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1684     } else {
1685       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1686     }
1687     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1688 
1689     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1690 
1691     /* local sweep */
1692     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1693     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1694     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1695   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1696 
1697   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatPermute_MPIAIJ"
1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1704 {
1705   Mat            aA,aB,Aperm;
1706   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1707   PetscScalar    *aa,*ba;
1708   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1709   PetscSF        rowsf,sf;
1710   IS             parcolp = NULL;
1711   PetscBool      done;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1716   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1717   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1718   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1719 
1720   /* Invert row permutation to find out where my rows should go */
1721   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1722   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1723   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1724   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1725   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1726   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1727 
1728   /* Invert column permutation to find out where my columns should go */
1729   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1730   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1731   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1732   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1733   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1734   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1736 
1737   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1738   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1739   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1740 
1741   /* Find out where my gcols should go */
1742   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1743   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1744   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1745   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1746   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1747   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1748   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1750 
1751   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1752   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1756   for (i=0; i<m; i++) {
1757     PetscInt row = rdest[i],rowner;
1758     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1759     for (j=ai[i]; j<ai[i+1]; j++) {
1760       PetscInt cowner,col = cdest[aj[j]];
1761       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1762       if (rowner == cowner) dnnz[i]++;
1763       else onnz[i]++;
1764     }
1765     for (j=bi[i]; j<bi[i+1]; j++) {
1766       PetscInt cowner,col = gcdest[bj[j]];
1767       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1768       if (rowner == cowner) dnnz[i]++;
1769       else onnz[i]++;
1770     }
1771   }
1772   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1775   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1779 
1780   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1781   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1783   for (i=0; i<m; i++) {
1784     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1785     PetscInt rowlen;
1786     rowlen = ai[i+1] - ai[i];
1787     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1788     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1789     rowlen = bi[i+1] - bi[i];
1790     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1791     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1792   }
1793   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1797   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1799   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1800   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1801   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1802   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1803   *B = Aperm;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1810 {
1811   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1812   Mat            A    = mat->A,B = mat->B;
1813   PetscErrorCode ierr;
1814   PetscReal      isend[5],irecv[5];
1815 
1816   PetscFunctionBegin;
1817   info->block_size = 1.0;
1818   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1819 
1820   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1821   isend[3] = info->memory;  isend[4] = info->mallocs;
1822 
1823   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1824 
1825   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1826   isend[3] += info->memory;  isend[4] += info->mallocs;
1827   if (flag == MAT_LOCAL) {
1828     info->nz_used      = isend[0];
1829     info->nz_allocated = isend[1];
1830     info->nz_unneeded  = isend[2];
1831     info->memory       = isend[3];
1832     info->mallocs      = isend[4];
1833   } else if (flag == MAT_GLOBAL_MAX) {
1834     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1835 
1836     info->nz_used      = irecv[0];
1837     info->nz_allocated = irecv[1];
1838     info->nz_unneeded  = irecv[2];
1839     info->memory       = irecv[3];
1840     info->mallocs      = irecv[4];
1841   } else if (flag == MAT_GLOBAL_SUM) {
1842     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1843 
1844     info->nz_used      = irecv[0];
1845     info->nz_allocated = irecv[1];
1846     info->nz_unneeded  = irecv[2];
1847     info->memory       = irecv[3];
1848     info->mallocs      = irecv[4];
1849   }
1850   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1851   info->fill_ratio_needed = 0;
1852   info->factor_mallocs    = 0;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatSetOption_MPIAIJ"
1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1859 {
1860   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   switch (op) {
1865   case MAT_NEW_NONZERO_LOCATIONS:
1866   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1867   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1868   case MAT_KEEP_NONZERO_PATTERN:
1869   case MAT_NEW_NONZERO_LOCATION_ERR:
1870   case MAT_USE_INODES:
1871   case MAT_IGNORE_ZERO_ENTRIES:
1872     MatCheckPreallocated(A,1);
1873     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1874     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1875     break;
1876   case MAT_ROW_ORIENTED:
1877     a->roworiented = flg;
1878 
1879     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1880     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1881     break;
1882   case MAT_NEW_DIAGONALS:
1883     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1884     break;
1885   case MAT_IGNORE_OFF_PROC_ENTRIES:
1886     a->donotstash = flg;
1887     break;
1888   case MAT_SPD:
1889     A->spd_set = PETSC_TRUE;
1890     A->spd     = flg;
1891     if (flg) {
1892       A->symmetric                  = PETSC_TRUE;
1893       A->structurally_symmetric     = PETSC_TRUE;
1894       A->symmetric_set              = PETSC_TRUE;
1895       A->structurally_symmetric_set = PETSC_TRUE;
1896     }
1897     break;
1898   case MAT_SYMMETRIC:
1899     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1900     break;
1901   case MAT_STRUCTURALLY_SYMMETRIC:
1902     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1903     break;
1904   case MAT_HERMITIAN:
1905     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1906     break;
1907   case MAT_SYMMETRY_ETERNAL:
1908     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1909     break;
1910   default:
1911     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatGetRow_MPIAIJ"
1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919 {
1920   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1921   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1922   PetscErrorCode ierr;
1923   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1924   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1925   PetscInt       *cmap,*idx_p;
1926 
1927   PetscFunctionBegin;
1928   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1929   mat->getrowactive = PETSC_TRUE;
1930 
1931   if (!mat->rowvalues && (idx || v)) {
1932     /*
1933         allocate enough space to hold information from the longest row.
1934     */
1935     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1936     PetscInt   max = 1,tmp;
1937     for (i=0; i<matin->rmap->n; i++) {
1938       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1939       if (max < tmp) max = tmp;
1940     }
1941     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1942   }
1943 
1944   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1945   lrow = row - rstart;
1946 
1947   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1948   if (!v)   {pvA = 0; pvB = 0;}
1949   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1950   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1951   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1952   nztot = nzA + nzB;
1953 
1954   cmap = mat->garray;
1955   if (v  || idx) {
1956     if (nztot) {
1957       /* Sort by increasing column numbers, assuming A and B already sorted */
1958       PetscInt imark = -1;
1959       if (v) {
1960         *v = v_p = mat->rowvalues;
1961         for (i=0; i<nzB; i++) {
1962           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1963           else break;
1964         }
1965         imark = i;
1966         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1967         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1968       }
1969       if (idx) {
1970         *idx = idx_p = mat->rowindices;
1971         if (imark > -1) {
1972           for (i=0; i<imark; i++) {
1973             idx_p[i] = cmap[cworkB[i]];
1974           }
1975         } else {
1976           for (i=0; i<nzB; i++) {
1977             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1978             else break;
1979           }
1980           imark = i;
1981         }
1982         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1983         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1984       }
1985     } else {
1986       if (idx) *idx = 0;
1987       if (v)   *v   = 0;
1988     }
1989   }
1990   *nz  = nztot;
1991   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1992   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1999 {
2000   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2001 
2002   PetscFunctionBegin;
2003   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2004   aij->getrowactive = PETSC_FALSE;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatNorm_MPIAIJ"
2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2011 {
2012   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2013   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2014   PetscErrorCode ierr;
2015   PetscInt       i,j,cstart = mat->cmap->rstart;
2016   PetscReal      sum = 0.0;
2017   MatScalar      *v;
2018 
2019   PetscFunctionBegin;
2020   if (aij->size == 1) {
2021     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2022   } else {
2023     if (type == NORM_FROBENIUS) {
2024       v = amat->a;
2025       for (i=0; i<amat->nz; i++) {
2026         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2027       }
2028       v = bmat->a;
2029       for (i=0; i<bmat->nz; i++) {
2030         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2031       }
2032       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2033       *norm = PetscSqrtReal(*norm);
2034     } else if (type == NORM_1) { /* max column norm */
2035       PetscReal *tmp,*tmp2;
2036       PetscInt  *jj,*garray = aij->garray;
2037       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2039       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2040       *norm = 0.0;
2041       v     = amat->a; jj = amat->j;
2042       for (j=0; j<amat->nz; j++) {
2043         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2044       }
2045       v = bmat->a; jj = bmat->j;
2046       for (j=0; j<bmat->nz; j++) {
2047         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2048       }
2049       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2050       for (j=0; j<mat->cmap->N; j++) {
2051         if (tmp2[j] > *norm) *norm = tmp2[j];
2052       }
2053       ierr = PetscFree(tmp);CHKERRQ(ierr);
2054       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2055     } else if (type == NORM_INFINITY) { /* max row norm */
2056       PetscReal ntemp = 0.0;
2057       for (j=0; j<aij->A->rmap->n; j++) {
2058         v   = amat->a + amat->i[j];
2059         sum = 0.0;
2060         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2061           sum += PetscAbsScalar(*v); v++;
2062         }
2063         v = bmat->a + bmat->i[j];
2064         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2065           sum += PetscAbsScalar(*v); v++;
2066         }
2067         if (sum > ntemp) ntemp = sum;
2068       }
2069       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2070     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatTranspose_MPIAIJ"
2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2078 {
2079   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2080   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2081   PetscErrorCode ierr;
2082   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2083   PetscInt       cstart = A->cmap->rstart,ncol;
2084   Mat            B;
2085   MatScalar      *array;
2086 
2087   PetscFunctionBegin;
2088   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2089 
2090   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2091   ai = Aloc->i; aj = Aloc->j;
2092   bi = Bloc->i; bj = Bloc->j;
2093   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2094     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2095     PetscSFNode          *oloc;
2096     PETSC_UNUSED PetscSF sf;
2097 
2098     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2099     /* compute d_nnz for preallocation */
2100     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2101     for (i=0; i<ai[ma]; i++) {
2102       d_nnz[aj[i]]++;
2103       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2104     }
2105     /* compute local off-diagonal contributions */
2106     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2107     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2108     /* map those to global */
2109     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2110     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2111     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2112     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2113     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2114     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2116 
2117     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2118     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2119     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2120     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2122     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2123   } else {
2124     B    = *matout;
2125     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2126     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2127   }
2128 
2129   /* copy over the A part */
2130   array = Aloc->a;
2131   row   = A->rmap->rstart;
2132   for (i=0; i<ma; i++) {
2133     ncol = ai[i+1]-ai[i];
2134     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2135     row++;
2136     array += ncol; aj += ncol;
2137   }
2138   aj = Aloc->j;
2139   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2140 
2141   /* copy over the B part */
2142   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2143   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2144   array = Bloc->a;
2145   row   = A->rmap->rstart;
2146   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2147   cols_tmp = cols;
2148   for (i=0; i<mb; i++) {
2149     ncol = bi[i+1]-bi[i];
2150     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2151     row++;
2152     array += ncol; cols_tmp += ncol;
2153   }
2154   ierr = PetscFree(cols);CHKERRQ(ierr);
2155 
2156   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2159     *matout = B;
2160   } else {
2161     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2169 {
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171   Mat            a    = aij->A,b = aij->B;
2172   PetscErrorCode ierr;
2173   PetscInt       s1,s2,s3;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2177   if (rr) {
2178     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2179     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2180     /* Overlap communication with computation. */
2181     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2182   }
2183   if (ll) {
2184     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2185     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2186     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2187   }
2188   /* scale  the diagonal block */
2189   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2190 
2191   if (rr) {
2192     /* Do a scatter end and then right scale the off-diagonal block */
2193     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2194     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatEqual_MPIAIJ"
2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2214 {
2215   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2216   Mat            a,b,c,d;
2217   PetscBool      flg;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   a = matA->A; b = matA->B;
2222   c = matB->A; d = matB->B;
2223 
2224   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2225   if (flg) {
2226     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2227   }
2228   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatCopy_MPIAIJ"
2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2238   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2239 
2240   PetscFunctionBegin;
2241   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2242   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2243     /* because of the column compression in the off-processor part of the matrix a->B,
2244        the number of columns in a->B and b->B may be different, hence we cannot call
2245        the MatCopy() directly on the two parts. If need be, we can provide a more
2246        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2247        then copying the submatrices */
2248     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2249   } else {
2250     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2251     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatSetUp_MPIAIJ"
2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2259 {
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2271 {
2272   PetscInt       i,m=Y->rmap->N;
2273   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2274   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2275   const PetscInt *xi = x->i,*yi = y->i;
2276 
2277   PetscFunctionBegin;
2278   /* Set the number of nonzeros in the new matrix */
2279   for (i=0; i<m; i++) {
2280     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2281     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2282     nnz[i] = 0;
2283     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2284       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2285       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2286       nnz[i]++;
2287     }
2288     for (; k<nzy; k++) nnz[i]++;
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatAXPY_MPIAIJ"
2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2296 {
2297   PetscErrorCode ierr;
2298   PetscInt       i;
2299   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2300   PetscBLASInt   bnz,one=1;
2301   Mat_SeqAIJ     *x,*y;
2302 
2303   PetscFunctionBegin;
2304   if (str == SAME_NONZERO_PATTERN) {
2305     PetscScalar alpha = a;
2306     x    = (Mat_SeqAIJ*)xx->A->data;
2307     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2308     y    = (Mat_SeqAIJ*)yy->A->data;
2309     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2310     x    = (Mat_SeqAIJ*)xx->B->data;
2311     y    = (Mat_SeqAIJ*)yy->B->data;
2312     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2313     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2314   } else if (str == SUBSET_NONZERO_PATTERN) {
2315     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2316 
2317     x = (Mat_SeqAIJ*)xx->B->data;
2318     y = (Mat_SeqAIJ*)yy->B->data;
2319     if (y->xtoy && y->XtoY != xx->B) {
2320       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2321       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2322     }
2323     if (!y->xtoy) { /* get xtoy */
2324       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2325       y->XtoY = xx->B;
2326       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2327     }
2328     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2329   } else {
2330     Mat      B;
2331     PetscInt *nnz_d,*nnz_o;
2332     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2333     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2334     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2335     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2336     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2337     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2338     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2339     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2341     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2342     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2343     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2344     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2346   }
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatConjugate_MPIAIJ"
2354 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2355 {
2356 #if defined(PETSC_USE_COMPLEX)
2357   PetscErrorCode ierr;
2358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2359 
2360   PetscFunctionBegin;
2361   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2362   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2363 #else
2364   PetscFunctionBegin;
2365 #endif
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatRealPart_MPIAIJ"
2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2372 {
2373   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2374   PetscErrorCode ierr;
2375 
2376   PetscFunctionBegin;
2377   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2378   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2385 {
2386   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2391   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #if defined(PETSC_HAVE_PBGL)
2396 
2397 #include <boost/parallel/mpi/bsp_process_group.hpp>
2398 #include <boost/graph/distributed/ilu_default_graph.hpp>
2399 #include <boost/graph/distributed/ilu_0_block.hpp>
2400 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2401 #include <boost/graph/distributed/petsc/interface.hpp>
2402 #include <boost/multi_array.hpp>
2403 #include <boost/parallel/distributed_property_map->hpp>
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2407 /*
2408   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2409 */
2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2411 {
2412   namespace petsc = boost::distributed::petsc;
2413 
2414   namespace graph_dist = boost::graph::distributed;
2415   using boost::graph::distributed::ilu_default::process_group_type;
2416   using boost::graph::ilu_permuted;
2417 
2418   PetscBool      row_identity, col_identity;
2419   PetscContainer c;
2420   PetscInt       m, n, M, N;
2421   PetscErrorCode ierr;
2422 
2423   PetscFunctionBegin;
2424   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2425   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2426   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2427   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2428 
2429   process_group_type pg;
2430   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2431   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2432   lgraph_type& level_graph = *lgraph_p;
2433   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2434 
2435   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2436   ilu_permuted(level_graph);
2437 
2438   /* put together the new matrix */
2439   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2440   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2441   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2442   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2443   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2444   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2445   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2446   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447 
2448   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2449   ierr = PetscContainerSetPointer(c, lgraph_p);
2450   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2451   ierr = PetscContainerDestroy(&c);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2458 {
2459   PetscFunctionBegin;
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatSolve_MPIAIJ"
2465 /*
2466   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2467 */
2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2469 {
2470   namespace graph_dist = boost::graph::distributed;
2471 
2472   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2473   lgraph_type    *lgraph_p;
2474   PetscContainer c;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2479   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2480   ierr = VecCopy(b, x);CHKERRQ(ierr);
2481 
2482   PetscScalar *array_x;
2483   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2484   PetscInt sx;
2485   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2486 
2487   PetscScalar *array_b;
2488   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2489   PetscInt sb;
2490   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2491 
2492   lgraph_type& level_graph = *lgraph_p;
2493   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2494 
2495   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2496   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2497   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2498 
2499   typedef boost::iterator_property_map<array_ref_type::iterator,
2500                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2501   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2502   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2503 
2504   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2505   PetscFunctionReturn(0);
2506 }
2507 #endif
2508 
2509 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2510   PetscInt    nzlocal,nsends,nrecvs;
2511   PetscMPIInt *send_rank,*recv_rank;
2512   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2513   PetscScalar *sbuf_a,**rbuf_a;
2514   PetscErrorCode (*Destroy)(Mat);
2515 } Mat_Redundant;
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2519 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2520 {
2521   PetscErrorCode ierr;
2522   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2523   PetscInt       i;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2527   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2528   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2529   for (i=0; i<redund->nrecvs; i++) {
2530     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2532   }
2533   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2534   ierr = PetscFree(redund);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatDestroy_MatRedundant"
2540 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2541 {
2542   PetscErrorCode ierr;
2543   PetscContainer container;
2544   Mat_Redundant  *redund=NULL;
2545 
2546   PetscFunctionBegin;
2547   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2548   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2549   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2550 
2551   A->ops->destroy = redund->Destroy;
2552 
2553   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2554   if (A->ops->destroy) {
2555     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2562 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2563 {
2564   PetscMPIInt    rank,size;
2565   MPI_Comm       comm;
2566   PetscErrorCode ierr;
2567   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2568   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2569   PetscInt       *rowrange = mat->rmap->range;
2570   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2571   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2572   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2573   PetscScalar    *sbuf_a;
2574   PetscInt       nzlocal=a->nz+b->nz;
2575   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2576   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2577   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2578   MatScalar      *aworkA,*aworkB;
2579   PetscScalar    *vals;
2580   PetscMPIInt    tag1,tag2,tag3,imdex;
2581   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2582   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2583   MPI_Status     recv_status,*send_status;
2584   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2585   PetscInt       **rbuf_j=NULL;
2586   PetscScalar    **rbuf_a=NULL;
2587   Mat_Redundant  *redund =NULL;
2588   PetscContainer container;
2589 
2590   PetscFunctionBegin;
2591   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2592   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2594 
2595   if (reuse == MAT_REUSE_MATRIX) {
2596     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2597     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2598     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2599     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2600     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2601     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2602     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2603     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2604 
2605     nsends    = redund->nsends;
2606     nrecvs    = redund->nrecvs;
2607     send_rank = redund->send_rank;
2608     recv_rank = redund->recv_rank;
2609     sbuf_nz   = redund->sbuf_nz;
2610     rbuf_nz   = redund->rbuf_nz;
2611     sbuf_j    = redund->sbuf_j;
2612     sbuf_a    = redund->sbuf_a;
2613     rbuf_j    = redund->rbuf_j;
2614     rbuf_a    = redund->rbuf_a;
2615   }
2616 
2617   if (reuse == MAT_INITIAL_MATRIX) {
2618     PetscMPIInt subrank,subsize;
2619     PetscInt    nleftover,np_subcomm;
2620     /* get the destination processors' id send_rank, nsends and nrecvs */
2621     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2622     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2623     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2624 
2625     np_subcomm = size/nsubcomm;
2626     nleftover  = size - nsubcomm*np_subcomm;
2627 
2628     nsends = 0; nrecvs = 0;
2629     for (i=0; i<size; i++) { /* i=rank*/
2630       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2631         send_rank[nsends]   = i; nsends++;
2632         recv_rank[nrecvs++] = i;
2633       }
2634     }
2635     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2636       i = size-nleftover-1;
2637       j = 0;
2638       while (j < nsubcomm - nleftover) {
2639         send_rank[nsends++] = i;
2640         i--; j++;
2641       }
2642     }
2643 
2644     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2645       for (i=0; i<nleftover; i++) {
2646         recv_rank[nrecvs++] = size-nleftover+i;
2647       }
2648     }
2649 
2650     /* allocate sbuf_j, sbuf_a */
2651     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2652     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2653     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2654   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2655 
2656   /* copy mat's local entries into the buffers */
2657   if (reuse == MAT_INITIAL_MATRIX) {
2658     rownz_max = 0;
2659     rptr      = sbuf_j;
2660     cols      = sbuf_j + rend-rstart + 1;
2661     vals      = sbuf_a;
2662     rptr[0]   = 0;
2663     for (i=0; i<rend-rstart; i++) {
2664       row    = i + rstart;
2665       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2666       ncols  = nzA + nzB;
2667       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2668       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2669       /* load the column indices for this row into cols */
2670       lwrite = 0;
2671       for (l=0; l<nzB; l++) {
2672         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2673           vals[lwrite]   = aworkB[l];
2674           cols[lwrite++] = ctmp;
2675         }
2676       }
2677       for (l=0; l<nzA; l++) {
2678         vals[lwrite]   = aworkA[l];
2679         cols[lwrite++] = cstart + cworkA[l];
2680       }
2681       for (l=0; l<nzB; l++) {
2682         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2683           vals[lwrite]   = aworkB[l];
2684           cols[lwrite++] = ctmp;
2685         }
2686       }
2687       vals     += ncols;
2688       cols     += ncols;
2689       rptr[i+1] = rptr[i] + ncols;
2690       if (rownz_max < ncols) rownz_max = ncols;
2691     }
2692     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2693   } else { /* only copy matrix values into sbuf_a */
2694     rptr    = sbuf_j;
2695     vals    = sbuf_a;
2696     rptr[0] = 0;
2697     for (i=0; i<rend-rstart; i++) {
2698       row    = i + rstart;
2699       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2700       ncols  = nzA + nzB;
2701       cworkB = b->j + b->i[i];
2702       aworkA = a->a + a->i[i];
2703       aworkB = b->a + b->i[i];
2704       lwrite = 0;
2705       for (l=0; l<nzB; l++) {
2706         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2707       }
2708       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2709       for (l=0; l<nzB; l++) {
2710         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2711       }
2712       vals     += ncols;
2713       rptr[i+1] = rptr[i] + ncols;
2714     }
2715   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2716 
2717   /* send nzlocal to others, and recv other's nzlocal */
2718   /*--------------------------------------------------*/
2719   if (reuse == MAT_INITIAL_MATRIX) {
2720     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2721 
2722     s_waits2 = s_waits3 + nsends;
2723     s_waits1 = s_waits2 + nsends;
2724     r_waits1 = s_waits1 + nsends;
2725     r_waits2 = r_waits1 + nrecvs;
2726     r_waits3 = r_waits2 + nrecvs;
2727   } else {
2728     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2729 
2730     r_waits3 = s_waits3 + nsends;
2731   }
2732 
2733   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2734   if (reuse == MAT_INITIAL_MATRIX) {
2735     /* get new tags to keep the communication clean */
2736     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2737     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2738     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2739 
2740     /* post receives of other's nzlocal */
2741     for (i=0; i<nrecvs; i++) {
2742       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2743     }
2744     /* send nzlocal to others */
2745     for (i=0; i<nsends; i++) {
2746       sbuf_nz[i] = nzlocal;
2747       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2748     }
2749     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2750     count = nrecvs;
2751     while (count) {
2752       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2753 
2754       recv_rank[imdex] = recv_status.MPI_SOURCE;
2755       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2756       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2757 
2758       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2759 
2760       rbuf_nz[imdex] += i + 2;
2761 
2762       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2763       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2764       count--;
2765     }
2766     /* wait on sends of nzlocal */
2767     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2768     /* send mat->i,j to others, and recv from other's */
2769     /*------------------------------------------------*/
2770     for (i=0; i<nsends; i++) {
2771       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2772       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2773     }
2774     /* wait on receives of mat->i,j */
2775     /*------------------------------*/
2776     count = nrecvs;
2777     while (count) {
2778       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2779       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2780       count--;
2781     }
2782     /* wait on sends of mat->i,j */
2783     /*---------------------------*/
2784     if (nsends) {
2785       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2786     }
2787   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2788 
2789   /* post receives, send and receive mat->a */
2790   /*----------------------------------------*/
2791   for (imdex=0; imdex<nrecvs; imdex++) {
2792     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2793   }
2794   for (i=0; i<nsends; i++) {
2795     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2796   }
2797   count = nrecvs;
2798   while (count) {
2799     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2800     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2801     count--;
2802   }
2803   if (nsends) {
2804     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2805   }
2806 
2807   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2808 
2809   /* create redundant matrix */
2810   /*-------------------------*/
2811   if (reuse == MAT_INITIAL_MATRIX) {
2812     /* compute rownz_max for preallocation */
2813     for (imdex=0; imdex<nrecvs; imdex++) {
2814       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2815       rptr = rbuf_j[imdex];
2816       for (i=0; i<j; i++) {
2817         ncols = rptr[i+1] - rptr[i];
2818         if (rownz_max < ncols) rownz_max = ncols;
2819       }
2820     }
2821 
2822     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2823     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2824     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2825     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2826     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2827     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2828   } else {
2829     C = *matredundant;
2830   }
2831 
2832   /* insert local matrix entries */
2833   rptr = sbuf_j;
2834   cols = sbuf_j + rend-rstart + 1;
2835   vals = sbuf_a;
2836   for (i=0; i<rend-rstart; i++) {
2837     row   = i + rstart;
2838     ncols = rptr[i+1] - rptr[i];
2839     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2840     vals += ncols;
2841     cols += ncols;
2842   }
2843   /* insert received matrix entries */
2844   for (imdex=0; imdex<nrecvs; imdex++) {
2845     rstart = rowrange[recv_rank[imdex]];
2846     rend   = rowrange[recv_rank[imdex]+1];
2847     rptr   = rbuf_j[imdex];
2848     cols   = rbuf_j[imdex] + rend-rstart + 1;
2849     vals   = rbuf_a[imdex];
2850     for (i=0; i<rend-rstart; i++) {
2851       row   = i + rstart;
2852       ncols = rptr[i+1] - rptr[i];
2853       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2854       vals += ncols;
2855       cols += ncols;
2856     }
2857   }
2858   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2861   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2862   if (reuse == MAT_INITIAL_MATRIX) {
2863     PetscContainer container;
2864     *matredundant = C;
2865     /* create a supporting struct and attach it to C for reuse */
2866     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2867     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2868     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2869     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2870     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2871     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2872 
2873     redund->nzlocal   = nzlocal;
2874     redund->nsends    = nsends;
2875     redund->nrecvs    = nrecvs;
2876     redund->send_rank = send_rank;
2877     redund->recv_rank = recv_rank;
2878     redund->sbuf_nz   = sbuf_nz;
2879     redund->rbuf_nz   = rbuf_nz;
2880     redund->sbuf_j    = sbuf_j;
2881     redund->sbuf_a    = sbuf_a;
2882     redund->rbuf_j    = rbuf_j;
2883     redund->rbuf_a    = rbuf_a;
2884 
2885     redund->Destroy = C->ops->destroy;
2886     C->ops->destroy = MatDestroy_MatRedundant;
2887   }
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2893 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2894 {
2895   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2896   PetscErrorCode ierr;
2897   PetscInt       i,*idxb = 0;
2898   PetscScalar    *va,*vb;
2899   Vec            vtmp;
2900 
2901   PetscFunctionBegin;
2902   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2903   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2904   if (idx) {
2905     for (i=0; i<A->rmap->n; i++) {
2906       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2907     }
2908   }
2909 
2910   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2911   if (idx) {
2912     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2913   }
2914   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2915   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2916 
2917   for (i=0; i<A->rmap->n; i++) {
2918     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2919       va[i] = vb[i];
2920       if (idx) idx[i] = a->garray[idxb[i]];
2921     }
2922   }
2923 
2924   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2925   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2926   ierr = PetscFree(idxb);CHKERRQ(ierr);
2927   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2933 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2934 {
2935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2936   PetscErrorCode ierr;
2937   PetscInt       i,*idxb = 0;
2938   PetscScalar    *va,*vb;
2939   Vec            vtmp;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2943   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2944   if (idx) {
2945     for (i=0; i<A->cmap->n; i++) {
2946       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2947     }
2948   }
2949 
2950   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2951   if (idx) {
2952     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2953   }
2954   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2955   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2956 
2957   for (i=0; i<A->rmap->n; i++) {
2958     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2959       va[i] = vb[i];
2960       if (idx) idx[i] = a->garray[idxb[i]];
2961     }
2962   }
2963 
2964   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2965   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2966   ierr = PetscFree(idxb);CHKERRQ(ierr);
2967   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 #undef __FUNCT__
2972 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2973 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2974 {
2975   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2976   PetscInt       n      = A->rmap->n;
2977   PetscInt       cstart = A->cmap->rstart;
2978   PetscInt       *cmap  = mat->garray;
2979   PetscInt       *diagIdx, *offdiagIdx;
2980   Vec            diagV, offdiagV;
2981   PetscScalar    *a, *diagA, *offdiagA;
2982   PetscInt       r;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2987   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2988   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2989   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2990   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2991   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2992   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2993   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2994   for (r = 0; r < n; ++r) {
2995     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2996       a[r]   = diagA[r];
2997       idx[r] = cstart + diagIdx[r];
2998     } else {
2999       a[r]   = offdiagA[r];
3000       idx[r] = cmap[offdiagIdx[r]];
3001     }
3002   }
3003   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3004   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3005   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3006   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3007   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3008   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 #undef __FUNCT__
3013 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3014 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3015 {
3016   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3017   PetscInt       n      = A->rmap->n;
3018   PetscInt       cstart = A->cmap->rstart;
3019   PetscInt       *cmap  = mat->garray;
3020   PetscInt       *diagIdx, *offdiagIdx;
3021   Vec            diagV, offdiagV;
3022   PetscScalar    *a, *diagA, *offdiagA;
3023   PetscInt       r;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3028   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3029   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3030   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3031   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3032   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3033   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3034   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3035   for (r = 0; r < n; ++r) {
3036     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3037       a[r]   = diagA[r];
3038       idx[r] = cstart + diagIdx[r];
3039     } else {
3040       a[r]   = offdiagA[r];
3041       idx[r] = cmap[offdiagIdx[r]];
3042     }
3043   }
3044   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3045   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3046   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3047   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3048   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3049   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 #undef __FUNCT__
3054 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3055 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3056 {
3057   PetscErrorCode ierr;
3058   Mat            *dummy;
3059 
3060   PetscFunctionBegin;
3061   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3062   *newmat = *dummy;
3063   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3071 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3072 {
3073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3074   PetscErrorCode ierr;
3075 
3076   PetscFunctionBegin;
3077   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3083 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3084 {
3085   PetscErrorCode ierr;
3086   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3087 
3088   PetscFunctionBegin;
3089   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3090   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3091   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 /* -------------------------------------------------------------------*/
3097 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3098        MatGetRow_MPIAIJ,
3099        MatRestoreRow_MPIAIJ,
3100        MatMult_MPIAIJ,
3101 /* 4*/ MatMultAdd_MPIAIJ,
3102        MatMultTranspose_MPIAIJ,
3103        MatMultTransposeAdd_MPIAIJ,
3104 #if defined(PETSC_HAVE_PBGL)
3105        MatSolve_MPIAIJ,
3106 #else
3107        0,
3108 #endif
3109        0,
3110        0,
3111 /*10*/ 0,
3112        0,
3113        0,
3114        MatSOR_MPIAIJ,
3115        MatTranspose_MPIAIJ,
3116 /*15*/ MatGetInfo_MPIAIJ,
3117        MatEqual_MPIAIJ,
3118        MatGetDiagonal_MPIAIJ,
3119        MatDiagonalScale_MPIAIJ,
3120        MatNorm_MPIAIJ,
3121 /*20*/ MatAssemblyBegin_MPIAIJ,
3122        MatAssemblyEnd_MPIAIJ,
3123        MatSetOption_MPIAIJ,
3124        MatZeroEntries_MPIAIJ,
3125 /*24*/ MatZeroRows_MPIAIJ,
3126        0,
3127 #if defined(PETSC_HAVE_PBGL)
3128        0,
3129 #else
3130        0,
3131 #endif
3132        0,
3133        0,
3134 /*29*/ MatSetUp_MPIAIJ,
3135 #if defined(PETSC_HAVE_PBGL)
3136        0,
3137 #else
3138        0,
3139 #endif
3140        0,
3141        0,
3142        0,
3143 /*34*/ MatDuplicate_MPIAIJ,
3144        0,
3145        0,
3146        0,
3147        0,
3148 /*39*/ MatAXPY_MPIAIJ,
3149        MatGetSubMatrices_MPIAIJ,
3150        MatIncreaseOverlap_MPIAIJ,
3151        MatGetValues_MPIAIJ,
3152        MatCopy_MPIAIJ,
3153 /*44*/ MatGetRowMax_MPIAIJ,
3154        MatScale_MPIAIJ,
3155        0,
3156        0,
3157        MatZeroRowsColumns_MPIAIJ,
3158 /*49*/ MatSetRandom_MPIAIJ,
3159        0,
3160        0,
3161        0,
3162        0,
3163 /*54*/ MatFDColoringCreate_MPIAIJ,
3164        0,
3165        MatSetUnfactored_MPIAIJ,
3166        MatPermute_MPIAIJ,
3167        0,
3168 /*59*/ MatGetSubMatrix_MPIAIJ,
3169        MatDestroy_MPIAIJ,
3170        MatView_MPIAIJ,
3171        0,
3172        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3173 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3174        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3175        0,
3176        0,
3177        0,
3178 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3179        MatGetRowMinAbs_MPIAIJ,
3180        0,
3181        MatSetColoring_MPIAIJ,
3182        0,
3183        MatSetValuesAdifor_MPIAIJ,
3184 /*75*/ MatFDColoringApply_AIJ,
3185        0,
3186        0,
3187        0,
3188        MatFindZeroDiagonals_MPIAIJ,
3189 /*80*/ 0,
3190        0,
3191        0,
3192 /*83*/ MatLoad_MPIAIJ,
3193        0,
3194        0,
3195        0,
3196        0,
3197        0,
3198 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3199        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3200        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3201        MatPtAP_MPIAIJ_MPIAIJ,
3202        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3203 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3204        0,
3205        0,
3206        0,
3207        0,
3208 /*99*/ 0,
3209        0,
3210        0,
3211        MatConjugate_MPIAIJ,
3212        0,
3213 /*104*/MatSetValuesRow_MPIAIJ,
3214        MatRealPart_MPIAIJ,
3215        MatImaginaryPart_MPIAIJ,
3216        0,
3217        0,
3218 /*109*/0,
3219        MatGetRedundantMatrix_MPIAIJ,
3220        MatGetRowMin_MPIAIJ,
3221        0,
3222        0,
3223 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3224        0,
3225        0,
3226        0,
3227        0,
3228 /*119*/0,
3229        0,
3230        0,
3231        0,
3232        MatGetMultiProcBlock_MPIAIJ,
3233 /*124*/MatFindNonzeroRows_MPIAIJ,
3234        MatGetColumnNorms_MPIAIJ,
3235        MatInvertBlockDiagonal_MPIAIJ,
3236        0,
3237        MatGetSubMatricesParallel_MPIAIJ,
3238 /*129*/0,
3239        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3240        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3241        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3242        0,
3243 /*134*/0,
3244        0,
3245        0,
3246        0,
3247        0
3248 };
3249 
3250 /* ----------------------------------------------------------------------------------------*/
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3254 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3255 {
3256   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3257   PetscErrorCode ierr;
3258 
3259   PetscFunctionBegin;
3260   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3261   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3267 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3268 {
3269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3270   PetscErrorCode ierr;
3271 
3272   PetscFunctionBegin;
3273   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3274   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3280 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3281 {
3282   Mat_MPIAIJ     *b;
3283   PetscErrorCode ierr;
3284   PetscInt       i;
3285   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3286 
3287   PetscFunctionBegin;
3288   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3289   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3290   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3291   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3292   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3293   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3294 
3295   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3296   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3297   if (d_nnz) {
3298     for (i=0; i<B->rmap->n; i++) {
3299       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3300     }
3301   }
3302   if (o_nnz) {
3303     for (i=0; i<B->rmap->n; i++) {
3304       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3305     }
3306   }
3307   b = (Mat_MPIAIJ*)B->data;
3308 
3309   if (!B->preallocated) {
3310     /* Explicitly create 2 MATSEQAIJ matrices. */
3311     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3312     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3313     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3314     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3315     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3316     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3317     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3318     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3319     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3320     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3321   }
3322 
3323   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3324   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3325   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3326   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3327   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3328   B->preallocated = PETSC_TRUE;
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3334 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3335 {
3336   Mat            mat;
3337   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3338   PetscErrorCode ierr;
3339 
3340   PetscFunctionBegin;
3341   *newmat = 0;
3342   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3343   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3344   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3345   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3346   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3347   a       = (Mat_MPIAIJ*)mat->data;
3348 
3349   mat->factortype   = matin->factortype;
3350   mat->rmap->bs     = matin->rmap->bs;
3351   mat->cmap->bs     = matin->cmap->bs;
3352   mat->assembled    = PETSC_TRUE;
3353   mat->insertmode   = NOT_SET_VALUES;
3354   mat->preallocated = PETSC_TRUE;
3355 
3356   a->size         = oldmat->size;
3357   a->rank         = oldmat->rank;
3358   a->donotstash   = oldmat->donotstash;
3359   a->roworiented  = oldmat->roworiented;
3360   a->rowindices   = 0;
3361   a->rowvalues    = 0;
3362   a->getrowactive = PETSC_FALSE;
3363 
3364   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3365   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3366 
3367   if (oldmat->colmap) {
3368 #if defined(PETSC_USE_CTABLE)
3369     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3370 #else
3371     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3372     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3373     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3374 #endif
3375   } else a->colmap = 0;
3376   if (oldmat->garray) {
3377     PetscInt len;
3378     len  = oldmat->B->cmap->n;
3379     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3380     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3381     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3382   } else a->garray = 0;
3383 
3384   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3385   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3386   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3387   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3388   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3389   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3390   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3391   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3392   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3393   *newmat = mat;
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 
3398 
3399 #undef __FUNCT__
3400 #define __FUNCT__ "MatLoad_MPIAIJ"
3401 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3402 {
3403   PetscScalar    *vals,*svals;
3404   MPI_Comm       comm;
3405   PetscErrorCode ierr;
3406   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3407   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3408   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3409   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3410   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3411   int            fd;
3412   PetscInt       bs = 1;
3413 
3414   PetscFunctionBegin;
3415   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3418   if (!rank) {
3419     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3420     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3421     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3422   }
3423 
3424   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3425   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3426   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3427 
3428   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3429 
3430   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3431   M    = header[1]; N = header[2];
3432   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3433   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3434   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3435 
3436   /* If global sizes are set, check if they are consistent with that given in the file */
3437   if (sizesset) {
3438     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3439   }
3440   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3441   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3442 
3443   /* determine ownership of all (block) rows */
3444   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3445   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3446   else m = newMat->rmap->n; /* Set by user */
3447 
3448   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3449   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3450 
3451   /* First process needs enough room for process with most rows */
3452   if (!rank) {
3453     mmax = rowners[1];
3454     for (i=2; i<=size; i++) {
3455       mmax = PetscMax(mmax, rowners[i]);
3456     }
3457   } else mmax = m;
3458 
3459   rowners[0] = 0;
3460   for (i=2; i<=size; i++) {
3461     rowners[i] += rowners[i-1];
3462   }
3463   rstart = rowners[rank];
3464   rend   = rowners[rank+1];
3465 
3466   /* distribute row lengths to all processors */
3467   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3468   if (!rank) {
3469     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3470     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3471     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3472     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3473     for (j=0; j<m; j++) {
3474       procsnz[0] += ourlens[j];
3475     }
3476     for (i=1; i<size; i++) {
3477       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3478       /* calculate the number of nonzeros on each processor */
3479       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3480         procsnz[i] += rowlengths[j];
3481       }
3482       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3483     }
3484     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3485   } else {
3486     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3487   }
3488 
3489   if (!rank) {
3490     /* determine max buffer needed and allocate it */
3491     maxnz = 0;
3492     for (i=0; i<size; i++) {
3493       maxnz = PetscMax(maxnz,procsnz[i]);
3494     }
3495     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3496 
3497     /* read in my part of the matrix column indices  */
3498     nz   = procsnz[0];
3499     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3500     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3501 
3502     /* read in every one elses and ship off */
3503     for (i=1; i<size; i++) {
3504       nz   = procsnz[i];
3505       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3506       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3507     }
3508     ierr = PetscFree(cols);CHKERRQ(ierr);
3509   } else {
3510     /* determine buffer space needed for message */
3511     nz = 0;
3512     for (i=0; i<m; i++) {
3513       nz += ourlens[i];
3514     }
3515     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3516 
3517     /* receive message of column indices*/
3518     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3519   }
3520 
3521   /* determine column ownership if matrix is not square */
3522   if (N != M) {
3523     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3524     else n = newMat->cmap->n;
3525     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3526     cstart = cend - n;
3527   } else {
3528     cstart = rstart;
3529     cend   = rend;
3530     n      = cend - cstart;
3531   }
3532 
3533   /* loop over local rows, determining number of off diagonal entries */
3534   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3535   jj   = 0;
3536   for (i=0; i<m; i++) {
3537     for (j=0; j<ourlens[i]; j++) {
3538       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3539       jj++;
3540     }
3541   }
3542 
3543   for (i=0; i<m; i++) {
3544     ourlens[i] -= offlens[i];
3545   }
3546   if (!sizesset) {
3547     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3548   }
3549 
3550   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3551 
3552   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3553 
3554   for (i=0; i<m; i++) {
3555     ourlens[i] += offlens[i];
3556   }
3557 
3558   if (!rank) {
3559     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3560 
3561     /* read in my part of the matrix numerical values  */
3562     nz   = procsnz[0];
3563     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3564 
3565     /* insert into matrix */
3566     jj      = rstart;
3567     smycols = mycols;
3568     svals   = vals;
3569     for (i=0; i<m; i++) {
3570       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3571       smycols += ourlens[i];
3572       svals   += ourlens[i];
3573       jj++;
3574     }
3575 
3576     /* read in other processors and ship out */
3577     for (i=1; i<size; i++) {
3578       nz   = procsnz[i];
3579       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3580       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3581     }
3582     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3583   } else {
3584     /* receive numeric values */
3585     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3586 
3587     /* receive message of values*/
3588     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3589 
3590     /* insert into matrix */
3591     jj      = rstart;
3592     smycols = mycols;
3593     svals   = vals;
3594     for (i=0; i<m; i++) {
3595       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3596       smycols += ourlens[i];
3597       svals   += ourlens[i];
3598       jj++;
3599     }
3600   }
3601   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3602   ierr = PetscFree(vals);CHKERRQ(ierr);
3603   ierr = PetscFree(mycols);CHKERRQ(ierr);
3604   ierr = PetscFree(rowners);CHKERRQ(ierr);
3605   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3612 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3613 {
3614   PetscErrorCode ierr;
3615   IS             iscol_local;
3616   PetscInt       csize;
3617 
3618   PetscFunctionBegin;
3619   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3620   if (call == MAT_REUSE_MATRIX) {
3621     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3622     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3623   } else {
3624     PetscInt cbs;
3625     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3626     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3627     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3628   }
3629   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3630   if (call == MAT_INITIAL_MATRIX) {
3631     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3632     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3633   }
3634   PetscFunctionReturn(0);
3635 }
3636 
3637 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3638 #undef __FUNCT__
3639 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3640 /*
3641     Not great since it makes two copies of the submatrix, first an SeqAIJ
3642   in local and then by concatenating the local matrices the end result.
3643   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3644 
3645   Note: This requires a sequential iscol with all indices.
3646 */
3647 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3648 {
3649   PetscErrorCode ierr;
3650   PetscMPIInt    rank,size;
3651   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3652   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3653   PetscBool      allcolumns, colflag;
3654   Mat            M,Mreuse;
3655   MatScalar      *vwork,*aa;
3656   MPI_Comm       comm;
3657   Mat_SeqAIJ     *aij;
3658 
3659   PetscFunctionBegin;
3660   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663 
3664   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3665   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3666   if (colflag && ncol == mat->cmap->N) {
3667     allcolumns = PETSC_TRUE;
3668   } else {
3669     allcolumns = PETSC_FALSE;
3670   }
3671   if (call ==  MAT_REUSE_MATRIX) {
3672     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3673     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3674     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3675   } else {
3676     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3677   }
3678 
3679   /*
3680       m - number of local rows
3681       n - number of columns (same on all processors)
3682       rstart - first row in new global matrix generated
3683   */
3684   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3685   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3686   if (call == MAT_INITIAL_MATRIX) {
3687     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3688     ii  = aij->i;
3689     jj  = aij->j;
3690 
3691     /*
3692         Determine the number of non-zeros in the diagonal and off-diagonal
3693         portions of the matrix in order to do correct preallocation
3694     */
3695 
3696     /* first get start and end of "diagonal" columns */
3697     if (csize == PETSC_DECIDE) {
3698       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3699       if (mglobal == n) { /* square matrix */
3700         nlocal = m;
3701       } else {
3702         nlocal = n/size + ((n % size) > rank);
3703       }
3704     } else {
3705       nlocal = csize;
3706     }
3707     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3708     rstart = rend - nlocal;
3709     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3710 
3711     /* next, compute all the lengths */
3712     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3713     olens = dlens + m;
3714     for (i=0; i<m; i++) {
3715       jend = ii[i+1] - ii[i];
3716       olen = 0;
3717       dlen = 0;
3718       for (j=0; j<jend; j++) {
3719         if (*jj < rstart || *jj >= rend) olen++;
3720         else dlen++;
3721         jj++;
3722       }
3723       olens[i] = olen;
3724       dlens[i] = dlen;
3725     }
3726     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3727     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3728     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3729     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3730     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3731     ierr = PetscFree(dlens);CHKERRQ(ierr);
3732   } else {
3733     PetscInt ml,nl;
3734 
3735     M    = *newmat;
3736     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3737     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3738     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3739     /*
3740          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3741        rather than the slower MatSetValues().
3742     */
3743     M->was_assembled = PETSC_TRUE;
3744     M->assembled     = PETSC_FALSE;
3745   }
3746   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3747   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3748   ii   = aij->i;
3749   jj   = aij->j;
3750   aa   = aij->a;
3751   for (i=0; i<m; i++) {
3752     row   = rstart + i;
3753     nz    = ii[i+1] - ii[i];
3754     cwork = jj;     jj += nz;
3755     vwork = aa;     aa += nz;
3756     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3757   }
3758 
3759   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   *newmat = M;
3762 
3763   /* save submatrix used in processor for next request */
3764   if (call ==  MAT_INITIAL_MATRIX) {
3765     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3766     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3767   }
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3773 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3774 {
3775   PetscInt       m,cstart, cend,j,nnz,i,d;
3776   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3777   const PetscInt *JJ;
3778   PetscScalar    *values;
3779   PetscErrorCode ierr;
3780 
3781   PetscFunctionBegin;
3782   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3783 
3784   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3785   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3786   m      = B->rmap->n;
3787   cstart = B->cmap->rstart;
3788   cend   = B->cmap->rend;
3789   rstart = B->rmap->rstart;
3790 
3791   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3792 
3793 #if defined(PETSC_USE_DEBUGGING)
3794   for (i=0; i<m; i++) {
3795     nnz = Ii[i+1]- Ii[i];
3796     JJ  = J + Ii[i];
3797     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3798     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3799     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3800   }
3801 #endif
3802 
3803   for (i=0; i<m; i++) {
3804     nnz     = Ii[i+1]- Ii[i];
3805     JJ      = J + Ii[i];
3806     nnz_max = PetscMax(nnz_max,nnz);
3807     d       = 0;
3808     for (j=0; j<nnz; j++) {
3809       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3810     }
3811     d_nnz[i] = d;
3812     o_nnz[i] = nnz - d;
3813   }
3814   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3815   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3816 
3817   if (v) values = (PetscScalar*)v;
3818   else {
3819     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3820     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3821   }
3822 
3823   for (i=0; i<m; i++) {
3824     ii   = i + rstart;
3825     nnz  = Ii[i+1]- Ii[i];
3826     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3827   }
3828   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830 
3831   if (!v) {
3832     ierr = PetscFree(values);CHKERRQ(ierr);
3833   }
3834   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3840 /*@
3841    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3842    (the default parallel PETSc format).
3843 
3844    Collective on MPI_Comm
3845 
3846    Input Parameters:
3847 +  B - the matrix
3848 .  i - the indices into j for the start of each local row (starts with zero)
3849 .  j - the column indices for each local row (starts with zero)
3850 -  v - optional values in the matrix
3851 
3852    Level: developer
3853 
3854    Notes:
3855        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3856      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3857      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3858 
3859        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3860 
3861        The format which is used for the sparse matrix input, is equivalent to a
3862     row-major ordering.. i.e for the following matrix, the input data expected is
3863     as shown:
3864 
3865         1 0 0
3866         2 0 3     P0
3867        -------
3868         4 5 6     P1
3869 
3870      Process0 [P0]: rows_owned=[0,1]
3871         i =  {0,1,3}  [size = nrow+1  = 2+1]
3872         j =  {0,0,2}  [size = nz = 6]
3873         v =  {1,2,3}  [size = nz = 6]
3874 
3875      Process1 [P1]: rows_owned=[2]
3876         i =  {0,3}    [size = nrow+1  = 1+1]
3877         j =  {0,1,2}  [size = nz = 6]
3878         v =  {4,5,6}  [size = nz = 6]
3879 
3880 .keywords: matrix, aij, compressed row, sparse, parallel
3881 
3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3883           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3884 @*/
3885 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3886 {
3887   PetscErrorCode ierr;
3888 
3889   PetscFunctionBegin;
3890   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3896 /*@C
3897    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3898    (the default parallel PETSc format).  For good matrix assembly performance
3899    the user should preallocate the matrix storage by setting the parameters
3900    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3901    performance can be increased by more than a factor of 50.
3902 
3903    Collective on MPI_Comm
3904 
3905    Input Parameters:
3906 +  A - the matrix
3907 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3908            (same value is used for all local rows)
3909 .  d_nnz - array containing the number of nonzeros in the various rows of the
3910            DIAGONAL portion of the local submatrix (possibly different for each row)
3911            or NULL, if d_nz is used to specify the nonzero structure.
3912            The size of this array is equal to the number of local rows, i.e 'm'.
3913            For matrices that will be factored, you must leave room for (and set)
3914            the diagonal entry even if it is zero.
3915 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3916            submatrix (same value is used for all local rows).
3917 -  o_nnz - array containing the number of nonzeros in the various rows of the
3918            OFF-DIAGONAL portion of the local submatrix (possibly different for
3919            each row) or NULL, if o_nz is used to specify the nonzero
3920            structure. The size of this array is equal to the number
3921            of local rows, i.e 'm'.
3922 
3923    If the *_nnz parameter is given then the *_nz parameter is ignored
3924 
3925    The AIJ format (also called the Yale sparse matrix format or
3926    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3927    storage.  The stored row and column indices begin with zero.
3928    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3929 
3930    The parallel matrix is partitioned such that the first m0 rows belong to
3931    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3932    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3933 
3934    The DIAGONAL portion of the local submatrix of a processor can be defined
3935    as the submatrix which is obtained by extraction the part corresponding to
3936    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3937    first row that belongs to the processor, r2 is the last row belonging to
3938    the this processor, and c1-c2 is range of indices of the local part of a
3939    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3940    common case of a square matrix, the row and column ranges are the same and
3941    the DIAGONAL part is also square. The remaining portion of the local
3942    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3943 
3944    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3945 
3946    You can call MatGetInfo() to get information on how effective the preallocation was;
3947    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3948    You can also run with the option -info and look for messages with the string
3949    malloc in them to see if additional memory allocation was needed.
3950 
3951    Example usage:
3952 
3953    Consider the following 8x8 matrix with 34 non-zero values, that is
3954    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3955    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3956    as follows:
3957 
3958 .vb
3959             1  2  0  |  0  3  0  |  0  4
3960     Proc0   0  5  6  |  7  0  0  |  8  0
3961             9  0 10  | 11  0  0  | 12  0
3962     -------------------------------------
3963            13  0 14  | 15 16 17  |  0  0
3964     Proc1   0 18  0  | 19 20 21  |  0  0
3965             0  0  0  | 22 23  0  | 24  0
3966     -------------------------------------
3967     Proc2  25 26 27  |  0  0 28  | 29  0
3968            30  0  0  | 31 32 33  |  0 34
3969 .ve
3970 
3971    This can be represented as a collection of submatrices as:
3972 
3973 .vb
3974       A B C
3975       D E F
3976       G H I
3977 .ve
3978 
3979    Where the submatrices A,B,C are owned by proc0, D,E,F are
3980    owned by proc1, G,H,I are owned by proc2.
3981 
3982    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3983    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3984    The 'M','N' parameters are 8,8, and have the same values on all procs.
3985 
3986    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3987    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3988    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3989    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3990    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3991    matrix, ans [DF] as another SeqAIJ matrix.
3992 
3993    When d_nz, o_nz parameters are specified, d_nz storage elements are
3994    allocated for every row of the local diagonal submatrix, and o_nz
3995    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3996    One way to choose d_nz and o_nz is to use the max nonzerors per local
3997    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3998    In this case, the values of d_nz,o_nz are:
3999 .vb
4000      proc0 : dnz = 2, o_nz = 2
4001      proc1 : dnz = 3, o_nz = 2
4002      proc2 : dnz = 1, o_nz = 4
4003 .ve
4004    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4005    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4006    for proc3. i.e we are using 12+15+10=37 storage locations to store
4007    34 values.
4008 
4009    When d_nnz, o_nnz parameters are specified, the storage is specified
4010    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4011    In the above case the values for d_nnz,o_nnz are:
4012 .vb
4013      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4014      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4015      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4016 .ve
4017    Here the space allocated is sum of all the above values i.e 34, and
4018    hence pre-allocation is perfect.
4019 
4020    Level: intermediate
4021 
4022 .keywords: matrix, aij, compressed row, sparse, parallel
4023 
4024 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4025           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4026 @*/
4027 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4028 {
4029   PetscErrorCode ierr;
4030 
4031   PetscFunctionBegin;
4032   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4033   PetscValidType(B,1);
4034   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4035   PetscFunctionReturn(0);
4036 }
4037 
4038 #undef __FUNCT__
4039 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4040 /*@
4041      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4042          CSR format the local rows.
4043 
4044    Collective on MPI_Comm
4045 
4046    Input Parameters:
4047 +  comm - MPI communicator
4048 .  m - number of local rows (Cannot be PETSC_DECIDE)
4049 .  n - This value should be the same as the local size used in creating the
4050        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4051        calculated if N is given) For square matrices n is almost always m.
4052 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4053 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4054 .   i - row indices
4055 .   j - column indices
4056 -   a - matrix values
4057 
4058    Output Parameter:
4059 .   mat - the matrix
4060 
4061    Level: intermediate
4062 
4063    Notes:
4064        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4065      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4066      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4067 
4068        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4069 
4070        The format which is used for the sparse matrix input, is equivalent to a
4071     row-major ordering.. i.e for the following matrix, the input data expected is
4072     as shown:
4073 
4074         1 0 0
4075         2 0 3     P0
4076        -------
4077         4 5 6     P1
4078 
4079      Process0 [P0]: rows_owned=[0,1]
4080         i =  {0,1,3}  [size = nrow+1  = 2+1]
4081         j =  {0,0,2}  [size = nz = 6]
4082         v =  {1,2,3}  [size = nz = 6]
4083 
4084      Process1 [P1]: rows_owned=[2]
4085         i =  {0,3}    [size = nrow+1  = 1+1]
4086         j =  {0,1,2}  [size = nz = 6]
4087         v =  {4,5,6}  [size = nz = 6]
4088 
4089 .keywords: matrix, aij, compressed row, sparse, parallel
4090 
4091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4092           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4093 @*/
4094 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4095 {
4096   PetscErrorCode ierr;
4097 
4098   PetscFunctionBegin;
4099   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4100   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4101   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4102   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4103   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4104   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4105   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4106   PetscFunctionReturn(0);
4107 }
4108 
4109 #undef __FUNCT__
4110 #define __FUNCT__ "MatCreateAIJ"
4111 /*@C
4112    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4113    (the default parallel PETSc format).  For good matrix assembly performance
4114    the user should preallocate the matrix storage by setting the parameters
4115    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4116    performance can be increased by more than a factor of 50.
4117 
4118    Collective on MPI_Comm
4119 
4120    Input Parameters:
4121 +  comm - MPI communicator
4122 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4123            This value should be the same as the local size used in creating the
4124            y vector for the matrix-vector product y = Ax.
4125 .  n - This value should be the same as the local size used in creating the
4126        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4127        calculated if N is given) For square matrices n is almost always m.
4128 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4129 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4130 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4131            (same value is used for all local rows)
4132 .  d_nnz - array containing the number of nonzeros in the various rows of the
4133            DIAGONAL portion of the local submatrix (possibly different for each row)
4134            or NULL, if d_nz is used to specify the nonzero structure.
4135            The size of this array is equal to the number of local rows, i.e 'm'.
4136 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4137            submatrix (same value is used for all local rows).
4138 -  o_nnz - array containing the number of nonzeros in the various rows of the
4139            OFF-DIAGONAL portion of the local submatrix (possibly different for
4140            each row) or NULL, if o_nz is used to specify the nonzero
4141            structure. The size of this array is equal to the number
4142            of local rows, i.e 'm'.
4143 
4144    Output Parameter:
4145 .  A - the matrix
4146 
4147    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4148    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4149    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4150 
4151    Notes:
4152    If the *_nnz parameter is given then the *_nz parameter is ignored
4153 
4154    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4155    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4156    storage requirements for this matrix.
4157 
4158    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4159    processor than it must be used on all processors that share the object for
4160    that argument.
4161 
4162    The user MUST specify either the local or global matrix dimensions
4163    (possibly both).
4164 
4165    The parallel matrix is partitioned across processors such that the
4166    first m0 rows belong to process 0, the next m1 rows belong to
4167    process 1, the next m2 rows belong to process 2 etc.. where
4168    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4169    values corresponding to [m x N] submatrix.
4170 
4171    The columns are logically partitioned with the n0 columns belonging
4172    to 0th partition, the next n1 columns belonging to the next
4173    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4174 
4175    The DIAGONAL portion of the local submatrix on any given processor
4176    is the submatrix corresponding to the rows and columns m,n
4177    corresponding to the given processor. i.e diagonal matrix on
4178    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4179    etc. The remaining portion of the local submatrix [m x (N-n)]
4180    constitute the OFF-DIAGONAL portion. The example below better
4181    illustrates this concept.
4182 
4183    For a square global matrix we define each processor's diagonal portion
4184    to be its local rows and the corresponding columns (a square submatrix);
4185    each processor's off-diagonal portion encompasses the remainder of the
4186    local matrix (a rectangular submatrix).
4187 
4188    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4189 
4190    When calling this routine with a single process communicator, a matrix of
4191    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4192    type of communicator, use the construction mechanism:
4193      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4194 
4195    By default, this format uses inodes (identical nodes) when possible.
4196    We search for consecutive rows with the same nonzero structure, thereby
4197    reusing matrix information to achieve increased efficiency.
4198 
4199    Options Database Keys:
4200 +  -mat_no_inode  - Do not use inodes
4201 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4202 -  -mat_aij_oneindex - Internally use indexing starting at 1
4203         rather than 0.  Note that when calling MatSetValues(),
4204         the user still MUST index entries starting at 0!
4205 
4206 
4207    Example usage:
4208 
4209    Consider the following 8x8 matrix with 34 non-zero values, that is
4210    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4211    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4212    as follows:
4213 
4214 .vb
4215             1  2  0  |  0  3  0  |  0  4
4216     Proc0   0  5  6  |  7  0  0  |  8  0
4217             9  0 10  | 11  0  0  | 12  0
4218     -------------------------------------
4219            13  0 14  | 15 16 17  |  0  0
4220     Proc1   0 18  0  | 19 20 21  |  0  0
4221             0  0  0  | 22 23  0  | 24  0
4222     -------------------------------------
4223     Proc2  25 26 27  |  0  0 28  | 29  0
4224            30  0  0  | 31 32 33  |  0 34
4225 .ve
4226 
4227    This can be represented as a collection of submatrices as:
4228 
4229 .vb
4230       A B C
4231       D E F
4232       G H I
4233 .ve
4234 
4235    Where the submatrices A,B,C are owned by proc0, D,E,F are
4236    owned by proc1, G,H,I are owned by proc2.
4237 
4238    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4239    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4240    The 'M','N' parameters are 8,8, and have the same values on all procs.
4241 
4242    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4243    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4244    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4245    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4246    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4247    matrix, ans [DF] as another SeqAIJ matrix.
4248 
4249    When d_nz, o_nz parameters are specified, d_nz storage elements are
4250    allocated for every row of the local diagonal submatrix, and o_nz
4251    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4252    One way to choose d_nz and o_nz is to use the max nonzerors per local
4253    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4254    In this case, the values of d_nz,o_nz are:
4255 .vb
4256      proc0 : dnz = 2, o_nz = 2
4257      proc1 : dnz = 3, o_nz = 2
4258      proc2 : dnz = 1, o_nz = 4
4259 .ve
4260    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4261    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4262    for proc3. i.e we are using 12+15+10=37 storage locations to store
4263    34 values.
4264 
4265    When d_nnz, o_nnz parameters are specified, the storage is specified
4266    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4267    In the above case the values for d_nnz,o_nnz are:
4268 .vb
4269      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4270      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4271      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4272 .ve
4273    Here the space allocated is sum of all the above values i.e 34, and
4274    hence pre-allocation is perfect.
4275 
4276    Level: intermediate
4277 
4278 .keywords: matrix, aij, compressed row, sparse, parallel
4279 
4280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4281           MPIAIJ, MatCreateMPIAIJWithArrays()
4282 @*/
4283 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4284 {
4285   PetscErrorCode ierr;
4286   PetscMPIInt    size;
4287 
4288   PetscFunctionBegin;
4289   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4290   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4291   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4292   if (size > 1) {
4293     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4294     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4295   } else {
4296     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4297     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4298   }
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 #undef __FUNCT__
4303 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4304 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4305 {
4306   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4307 
4308   PetscFunctionBegin;
4309   *Ad     = a->A;
4310   *Ao     = a->B;
4311   *colmap = a->garray;
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4317 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4318 {
4319   PetscErrorCode ierr;
4320   PetscInt       i;
4321   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4322 
4323   PetscFunctionBegin;
4324   if (coloring->ctype == IS_COLORING_GLOBAL) {
4325     ISColoringValue *allcolors,*colors;
4326     ISColoring      ocoloring;
4327 
4328     /* set coloring for diagonal portion */
4329     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4330 
4331     /* set coloring for off-diagonal portion */
4332     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4333     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4334     for (i=0; i<a->B->cmap->n; i++) {
4335       colors[i] = allcolors[a->garray[i]];
4336     }
4337     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4338     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4339     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4340     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4341   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4342     ISColoringValue *colors;
4343     PetscInt        *larray;
4344     ISColoring      ocoloring;
4345 
4346     /* set coloring for diagonal portion */
4347     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4348     for (i=0; i<a->A->cmap->n; i++) {
4349       larray[i] = i + A->cmap->rstart;
4350     }
4351     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4352     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4353     for (i=0; i<a->A->cmap->n; i++) {
4354       colors[i] = coloring->colors[larray[i]];
4355     }
4356     ierr = PetscFree(larray);CHKERRQ(ierr);
4357     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4358     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4359     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4360 
4361     /* set coloring for off-diagonal portion */
4362     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4363     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4364     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4365     for (i=0; i<a->B->cmap->n; i++) {
4366       colors[i] = coloring->colors[larray[i]];
4367     }
4368     ierr = PetscFree(larray);CHKERRQ(ierr);
4369     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4370     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4371     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4372   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4373   PetscFunctionReturn(0);
4374 }
4375 
4376 #undef __FUNCT__
4377 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4378 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4379 {
4380   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4381   PetscErrorCode ierr;
4382 
4383   PetscFunctionBegin;
4384   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4385   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4386   PetscFunctionReturn(0);
4387 }
4388 
4389 #undef __FUNCT__
4390 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4391 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4392 {
4393   PetscErrorCode ierr;
4394   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4395   PetscInt       *indx;
4396 
4397   PetscFunctionBegin;
4398   /* This routine will ONLY return MPIAIJ type matrix */
4399   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4400   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4401   if (n == PETSC_DECIDE) {
4402     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4403   }
4404   /* Check sum(n) = N */
4405   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4406   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4407 
4408   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409   rstart -= m;
4410 
4411   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4412   for (i=0; i<m; i++) {
4413     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4414     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4415     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4416   }
4417 
4418   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4419   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4420   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4421   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4422   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4423   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4429 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4430 {
4431   PetscErrorCode ierr;
4432   PetscInt       m,N,i,rstart,nnz,Ii;
4433   PetscInt       *indx;
4434   PetscScalar    *values;
4435 
4436   PetscFunctionBegin;
4437   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4438   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4439   for (i=0; i<m; i++) {
4440     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4441     Ii   = i + rstart;
4442     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4443     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4444   }
4445   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4446   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 #undef __FUNCT__
4451 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4452 /*@
4453       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4454                  matrices from each processor
4455 
4456     Collective on MPI_Comm
4457 
4458    Input Parameters:
4459 +    comm - the communicators the parallel matrix will live on
4460 .    inmat - the input sequential matrices
4461 .    n - number of local columns (or PETSC_DECIDE)
4462 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4463 
4464    Output Parameter:
4465 .    outmat - the parallel matrix generated
4466 
4467     Level: advanced
4468 
4469    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4470 
4471 @*/
4472 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4473 {
4474   PetscErrorCode ierr;
4475 
4476   PetscFunctionBegin;
4477   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4478   if (scall == MAT_INITIAL_MATRIX) {
4479     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4480   }
4481   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4482   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 #undef __FUNCT__
4487 #define __FUNCT__ "MatFileSplit"
4488 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4489 {
4490   PetscErrorCode    ierr;
4491   PetscMPIInt       rank;
4492   PetscInt          m,N,i,rstart,nnz;
4493   size_t            len;
4494   const PetscInt    *indx;
4495   PetscViewer       out;
4496   char              *name;
4497   Mat               B;
4498   const PetscScalar *values;
4499 
4500   PetscFunctionBegin;
4501   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4502   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4503   /* Should this be the type of the diagonal block of A? */
4504   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4505   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4506   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4507   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4508   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4509   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4510   for (i=0; i<m; i++) {
4511     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4512     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4513     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4514   }
4515   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4516   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517 
4518   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4519   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4520   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4521   sprintf(name,"%s.%d",outfile,rank);
4522   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4523   ierr = PetscFree(name);CHKERRQ(ierr);
4524   ierr = MatView(B,out);CHKERRQ(ierr);
4525   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4526   ierr = MatDestroy(&B);CHKERRQ(ierr);
4527   PetscFunctionReturn(0);
4528 }
4529 
4530 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4533 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4534 {
4535   PetscErrorCode      ierr;
4536   Mat_Merge_SeqsToMPI *merge;
4537   PetscContainer      container;
4538 
4539   PetscFunctionBegin;
4540   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   if (container) {
4542     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4555     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4556     ierr = PetscFree(merge);CHKERRQ(ierr);
4557     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4558   }
4559   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 #include <../src/mat/utils/freespace.h>
4564 #include <petscbt.h>
4565 
4566 #undef __FUNCT__
4567 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4568 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4569 {
4570   PetscErrorCode      ierr;
4571   MPI_Comm            comm;
4572   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4573   PetscMPIInt         size,rank,taga,*len_s;
4574   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4575   PetscInt            proc,m;
4576   PetscInt            **buf_ri,**buf_rj;
4577   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4578   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4579   MPI_Request         *s_waits,*r_waits;
4580   MPI_Status          *status;
4581   MatScalar           *aa=a->a;
4582   MatScalar           **abuf_r,*ba_i;
4583   Mat_Merge_SeqsToMPI *merge;
4584   PetscContainer      container;
4585 
4586   PetscFunctionBegin;
4587   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4588   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4589 
4590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4591   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4592 
4593   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4594   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4595 
4596   bi     = merge->bi;
4597   bj     = merge->bj;
4598   buf_ri = merge->buf_ri;
4599   buf_rj = merge->buf_rj;
4600 
4601   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4602   owners = merge->rowmap->range;
4603   len_s  = merge->len_s;
4604 
4605   /* send and recv matrix values */
4606   /*-----------------------------*/
4607   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4608   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4609 
4610   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4611   for (proc=0,k=0; proc<size; proc++) {
4612     if (!len_s[proc]) continue;
4613     i    = owners[proc];
4614     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4615     k++;
4616   }
4617 
4618   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4619   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4620   ierr = PetscFree(status);CHKERRQ(ierr);
4621 
4622   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4623   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4624 
4625   /* insert mat values of mpimat */
4626   /*----------------------------*/
4627   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4628   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4629 
4630   for (k=0; k<merge->nrecv; k++) {
4631     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4632     nrows       = *(buf_ri_k[k]);
4633     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4634     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4635   }
4636 
4637   /* set values of ba */
4638   m = merge->rowmap->n;
4639   for (i=0; i<m; i++) {
4640     arow = owners[rank] + i;
4641     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4642     bnzi = bi[i+1] - bi[i];
4643     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4644 
4645     /* add local non-zero vals of this proc's seqmat into ba */
4646     anzi   = ai[arow+1] - ai[arow];
4647     aj     = a->j + ai[arow];
4648     aa     = a->a + ai[arow];
4649     nextaj = 0;
4650     for (j=0; nextaj<anzi; j++) {
4651       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4652         ba_i[j] += aa[nextaj++];
4653       }
4654     }
4655 
4656     /* add received vals into ba */
4657     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4658       /* i-th row */
4659       if (i == *nextrow[k]) {
4660         anzi   = *(nextai[k]+1) - *nextai[k];
4661         aj     = buf_rj[k] + *(nextai[k]);
4662         aa     = abuf_r[k] + *(nextai[k]);
4663         nextaj = 0;
4664         for (j=0; nextaj<anzi; j++) {
4665           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4666             ba_i[j] += aa[nextaj++];
4667           }
4668         }
4669         nextrow[k]++; nextai[k]++;
4670       }
4671     }
4672     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4673   }
4674   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4676 
4677   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4678   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4679   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4680   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4681   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4682   PetscFunctionReturn(0);
4683 }
4684 
4685 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4686 
4687 #undef __FUNCT__
4688 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4689 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4690 {
4691   PetscErrorCode      ierr;
4692   Mat                 B_mpi;
4693   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4694   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4695   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4696   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4697   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4698   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4699   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4700   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4701   MPI_Status          *status;
4702   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4703   PetscBT             lnkbt;
4704   Mat_Merge_SeqsToMPI *merge;
4705   PetscContainer      container;
4706 
4707   PetscFunctionBegin;
4708   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4709 
4710   /* make sure it is a PETSc comm */
4711   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4714 
4715   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4716   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4717 
4718   /* determine row ownership */
4719   /*---------------------------------------------------------*/
4720   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4721   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4722   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4723   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4724   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4725   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4726   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4727 
4728   m      = merge->rowmap->n;
4729   owners = merge->rowmap->range;
4730 
4731   /* determine the number of messages to send, their lengths */
4732   /*---------------------------------------------------------*/
4733   len_s = merge->len_s;
4734 
4735   len          = 0; /* length of buf_si[] */
4736   merge->nsend = 0;
4737   for (proc=0; proc<size; proc++) {
4738     len_si[proc] = 0;
4739     if (proc == rank) {
4740       len_s[proc] = 0;
4741     } else {
4742       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4743       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4744     }
4745     if (len_s[proc]) {
4746       merge->nsend++;
4747       nrows = 0;
4748       for (i=owners[proc]; i<owners[proc+1]; i++) {
4749         if (ai[i+1] > ai[i]) nrows++;
4750       }
4751       len_si[proc] = 2*(nrows+1);
4752       len         += len_si[proc];
4753     }
4754   }
4755 
4756   /* determine the number and length of messages to receive for ij-structure */
4757   /*-------------------------------------------------------------------------*/
4758   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4759   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4760 
4761   /* post the Irecv of j-structure */
4762   /*-------------------------------*/
4763   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4764   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4765 
4766   /* post the Isend of j-structure */
4767   /*--------------------------------*/
4768   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4769 
4770   for (proc=0, k=0; proc<size; proc++) {
4771     if (!len_s[proc]) continue;
4772     i    = owners[proc];
4773     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4774     k++;
4775   }
4776 
4777   /* receives and sends of j-structure are complete */
4778   /*------------------------------------------------*/
4779   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4780   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4781 
4782   /* send and recv i-structure */
4783   /*---------------------------*/
4784   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4785   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4786 
4787   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4788   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4789   for (proc=0,k=0; proc<size; proc++) {
4790     if (!len_s[proc]) continue;
4791     /* form outgoing message for i-structure:
4792          buf_si[0]:                 nrows to be sent
4793                [1:nrows]:           row index (global)
4794                [nrows+1:2*nrows+1]: i-structure index
4795     */
4796     /*-------------------------------------------*/
4797     nrows       = len_si[proc]/2 - 1;
4798     buf_si_i    = buf_si + nrows+1;
4799     buf_si[0]   = nrows;
4800     buf_si_i[0] = 0;
4801     nrows       = 0;
4802     for (i=owners[proc]; i<owners[proc+1]; i++) {
4803       anzi = ai[i+1] - ai[i];
4804       if (anzi) {
4805         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4806         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4807         nrows++;
4808       }
4809     }
4810     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4811     k++;
4812     buf_si += len_si[proc];
4813   }
4814 
4815   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4816   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4817 
4818   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4819   for (i=0; i<merge->nrecv; i++) {
4820     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4821   }
4822 
4823   ierr = PetscFree(len_si);CHKERRQ(ierr);
4824   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4825   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4826   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4827   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4828   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4829   ierr = PetscFree(status);CHKERRQ(ierr);
4830 
4831   /* compute a local seq matrix in each processor */
4832   /*----------------------------------------------*/
4833   /* allocate bi array and free space for accumulating nonzero column info */
4834   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4835   bi[0] = 0;
4836 
4837   /* create and initialize a linked list */
4838   nlnk = N+1;
4839   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4840 
4841   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4842   len  = ai[owners[rank+1]] - ai[owners[rank]];
4843   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4844 
4845   current_space = free_space;
4846 
4847   /* determine symbolic info for each local row */
4848   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4849 
4850   for (k=0; k<merge->nrecv; k++) {
4851     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4852     nrows       = *buf_ri_k[k];
4853     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4854     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4855   }
4856 
4857   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4858   len  = 0;
4859   for (i=0; i<m; i++) {
4860     bnzi = 0;
4861     /* add local non-zero cols of this proc's seqmat into lnk */
4862     arow  = owners[rank] + i;
4863     anzi  = ai[arow+1] - ai[arow];
4864     aj    = a->j + ai[arow];
4865     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866     bnzi += nlnk;
4867     /* add received col data into lnk */
4868     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4869       if (i == *nextrow[k]) { /* i-th row */
4870         anzi  = *(nextai[k]+1) - *nextai[k];
4871         aj    = buf_rj[k] + *nextai[k];
4872         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4873         bnzi += nlnk;
4874         nextrow[k]++; nextai[k]++;
4875       }
4876     }
4877     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4878 
4879     /* if free space is not available, make more free space */
4880     if (current_space->local_remaining<bnzi) {
4881       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4882       nspacedouble++;
4883     }
4884     /* copy data into free space, then initialize lnk */
4885     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4886     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4887 
4888     current_space->array           += bnzi;
4889     current_space->local_used      += bnzi;
4890     current_space->local_remaining -= bnzi;
4891 
4892     bi[i+1] = bi[i] + bnzi;
4893   }
4894 
4895   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4896 
4897   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4898   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4899   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4900 
4901   /* create symbolic parallel matrix B_mpi */
4902   /*---------------------------------------*/
4903   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4904   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4905   if (n==PETSC_DECIDE) {
4906     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4907   } else {
4908     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4909   }
4910   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4911   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4912   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4913   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4914   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4915 
4916   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4917   B_mpi->assembled    = PETSC_FALSE;
4918   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4919   merge->bi           = bi;
4920   merge->bj           = bj;
4921   merge->buf_ri       = buf_ri;
4922   merge->buf_rj       = buf_rj;
4923   merge->coi          = NULL;
4924   merge->coj          = NULL;
4925   merge->owners_co    = NULL;
4926 
4927   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4928 
4929   /* attach the supporting struct to B_mpi for reuse */
4930   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4931   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4932   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4933   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4934   *mpimat = B_mpi;
4935 
4936   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4937   PetscFunctionReturn(0);
4938 }
4939 
4940 #undef __FUNCT__
4941 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4942 /*@C
4943       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4944                  matrices from each processor
4945 
4946     Collective on MPI_Comm
4947 
4948    Input Parameters:
4949 +    comm - the communicators the parallel matrix will live on
4950 .    seqmat - the input sequential matrices
4951 .    m - number of local rows (or PETSC_DECIDE)
4952 .    n - number of local columns (or PETSC_DECIDE)
4953 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4954 
4955    Output Parameter:
4956 .    mpimat - the parallel matrix generated
4957 
4958     Level: advanced
4959 
4960    Notes:
4961      The dimensions of the sequential matrix in each processor MUST be the same.
4962      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4963      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4964 @*/
4965 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4966 {
4967   PetscErrorCode ierr;
4968   PetscMPIInt    size;
4969 
4970   PetscFunctionBegin;
4971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4972   if (size == 1) {
4973     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4974     if (scall == MAT_INITIAL_MATRIX) {
4975       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4976     } else {
4977       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4978     }
4979     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4980     PetscFunctionReturn(0);
4981   }
4982   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4983   if (scall == MAT_INITIAL_MATRIX) {
4984     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4985   }
4986   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4987   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 #undef __FUNCT__
4992 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4993 /*@
4994      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4995           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4996           with MatGetSize()
4997 
4998     Not Collective
4999 
5000    Input Parameters:
5001 +    A - the matrix
5002 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5003 
5004    Output Parameter:
5005 .    A_loc - the local sequential matrix generated
5006 
5007     Level: developer
5008 
5009 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5010 
5011 @*/
5012 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5013 {
5014   PetscErrorCode ierr;
5015   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5016   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5017   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5018   MatScalar      *aa=a->a,*ba=b->a,*cam;
5019   PetscScalar    *ca;
5020   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5021   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5022   PetscBool      match;
5023 
5024   PetscFunctionBegin;
5025   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5026   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5027   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5028   if (scall == MAT_INITIAL_MATRIX) {
5029     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5030     ci[0] = 0;
5031     for (i=0; i<am; i++) {
5032       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5033     }
5034     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5035     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5036     k    = 0;
5037     for (i=0; i<am; i++) {
5038       ncols_o = bi[i+1] - bi[i];
5039       ncols_d = ai[i+1] - ai[i];
5040       /* off-diagonal portion of A */
5041       for (jo=0; jo<ncols_o; jo++) {
5042         col = cmap[*bj];
5043         if (col >= cstart) break;
5044         cj[k]   = col; bj++;
5045         ca[k++] = *ba++;
5046       }
5047       /* diagonal portion of A */
5048       for (j=0; j<ncols_d; j++) {
5049         cj[k]   = cstart + *aj++;
5050         ca[k++] = *aa++;
5051       }
5052       /* off-diagonal portion of A */
5053       for (j=jo; j<ncols_o; j++) {
5054         cj[k]   = cmap[*bj++];
5055         ca[k++] = *ba++;
5056       }
5057     }
5058     /* put together the new matrix */
5059     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5060     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5061     /* Since these are PETSc arrays, change flags to free them as necessary. */
5062     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5063     mat->free_a  = PETSC_TRUE;
5064     mat->free_ij = PETSC_TRUE;
5065     mat->nonew   = 0;
5066   } else if (scall == MAT_REUSE_MATRIX) {
5067     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5068     ci = mat->i; cj = mat->j; cam = mat->a;
5069     for (i=0; i<am; i++) {
5070       /* off-diagonal portion of A */
5071       ncols_o = bi[i+1] - bi[i];
5072       for (jo=0; jo<ncols_o; jo++) {
5073         col = cmap[*bj];
5074         if (col >= cstart) break;
5075         *cam++ = *ba++; bj++;
5076       }
5077       /* diagonal portion of A */
5078       ncols_d = ai[i+1] - ai[i];
5079       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5080       /* off-diagonal portion of A */
5081       for (j=jo; j<ncols_o; j++) {
5082         *cam++ = *ba++; bj++;
5083       }
5084     }
5085   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5086   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5087   PetscFunctionReturn(0);
5088 }
5089 
5090 #undef __FUNCT__
5091 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5092 /*@C
5093      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5094 
5095     Not Collective
5096 
5097    Input Parameters:
5098 +    A - the matrix
5099 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5100 -    row, col - index sets of rows and columns to extract (or NULL)
5101 
5102    Output Parameter:
5103 .    A_loc - the local sequential matrix generated
5104 
5105     Level: developer
5106 
5107 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5108 
5109 @*/
5110 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5111 {
5112   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5113   PetscErrorCode ierr;
5114   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5115   IS             isrowa,iscola;
5116   Mat            *aloc;
5117   PetscBool      match;
5118 
5119   PetscFunctionBegin;
5120   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5121   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5122   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5123   if (!row) {
5124     start = A->rmap->rstart; end = A->rmap->rend;
5125     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5126   } else {
5127     isrowa = *row;
5128   }
5129   if (!col) {
5130     start = A->cmap->rstart;
5131     cmap  = a->garray;
5132     nzA   = a->A->cmap->n;
5133     nzB   = a->B->cmap->n;
5134     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5135     ncols = 0;
5136     for (i=0; i<nzB; i++) {
5137       if (cmap[i] < start) idx[ncols++] = cmap[i];
5138       else break;
5139     }
5140     imark = i;
5141     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5142     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5143     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5144   } else {
5145     iscola = *col;
5146   }
5147   if (scall != MAT_INITIAL_MATRIX) {
5148     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5149     aloc[0] = *A_loc;
5150   }
5151   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5152   *A_loc = aloc[0];
5153   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5154   if (!row) {
5155     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5156   }
5157   if (!col) {
5158     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5159   }
5160   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5161   PetscFunctionReturn(0);
5162 }
5163 
5164 #undef __FUNCT__
5165 #define __FUNCT__ "MatGetBrowsOfAcols"
5166 /*@C
5167     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5168 
5169     Collective on Mat
5170 
5171    Input Parameters:
5172 +    A,B - the matrices in mpiaij format
5173 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5174 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5175 
5176    Output Parameter:
5177 +    rowb, colb - index sets of rows and columns of B to extract
5178 -    B_seq - the sequential matrix generated
5179 
5180     Level: developer
5181 
5182 @*/
5183 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5184 {
5185   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5186   PetscErrorCode ierr;
5187   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5188   IS             isrowb,iscolb;
5189   Mat            *bseq=NULL;
5190 
5191   PetscFunctionBegin;
5192   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5193     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5194   }
5195   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5196 
5197   if (scall == MAT_INITIAL_MATRIX) {
5198     start = A->cmap->rstart;
5199     cmap  = a->garray;
5200     nzA   = a->A->cmap->n;
5201     nzB   = a->B->cmap->n;
5202     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5203     ncols = 0;
5204     for (i=0; i<nzB; i++) {  /* row < local row index */
5205       if (cmap[i] < start) idx[ncols++] = cmap[i];
5206       else break;
5207     }
5208     imark = i;
5209     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5210     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5211     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5212     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5213   } else {
5214     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5215     isrowb  = *rowb; iscolb = *colb;
5216     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5217     bseq[0] = *B_seq;
5218   }
5219   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5220   *B_seq = bseq[0];
5221   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5222   if (!rowb) {
5223     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5224   } else {
5225     *rowb = isrowb;
5226   }
5227   if (!colb) {
5228     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5229   } else {
5230     *colb = iscolb;
5231   }
5232   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5233   PetscFunctionReturn(0);
5234 }
5235 
5236 #undef __FUNCT__
5237 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5238 /*
5239     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5240     of the OFF-DIAGONAL portion of local A
5241 
5242     Collective on Mat
5243 
5244    Input Parameters:
5245 +    A,B - the matrices in mpiaij format
5246 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5247 
5248    Output Parameter:
5249 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5250 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5251 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5252 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5253 
5254     Level: developer
5255 
5256 */
5257 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5258 {
5259   VecScatter_MPI_General *gen_to,*gen_from;
5260   PetscErrorCode         ierr;
5261   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5262   Mat_SeqAIJ             *b_oth;
5263   VecScatter             ctx =a->Mvctx;
5264   MPI_Comm               comm;
5265   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5266   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5267   PetscScalar            *rvalues,*svalues;
5268   MatScalar              *b_otha,*bufa,*bufA;
5269   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5270   MPI_Request            *rwaits = NULL,*swaits = NULL;
5271   MPI_Status             *sstatus,rstatus;
5272   PetscMPIInt            jj;
5273   PetscInt               *cols,sbs,rbs;
5274   PetscScalar            *vals;
5275 
5276   PetscFunctionBegin;
5277   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5278   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5279     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5280   }
5281   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5283 
5284   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5285   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5286   rvalues  = gen_from->values; /* holds the length of receiving row */
5287   svalues  = gen_to->values;   /* holds the length of sending row */
5288   nrecvs   = gen_from->n;
5289   nsends   = gen_to->n;
5290 
5291   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5292   srow    = gen_to->indices;    /* local row index to be sent */
5293   sstarts = gen_to->starts;
5294   sprocs  = gen_to->procs;
5295   sstatus = gen_to->sstatus;
5296   sbs     = gen_to->bs;
5297   rstarts = gen_from->starts;
5298   rprocs  = gen_from->procs;
5299   rbs     = gen_from->bs;
5300 
5301   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5302   if (scall == MAT_INITIAL_MATRIX) {
5303     /* i-array */
5304     /*---------*/
5305     /*  post receives */
5306     for (i=0; i<nrecvs; i++) {
5307       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5308       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5309       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5310     }
5311 
5312     /* pack the outgoing message */
5313     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5314 
5315     sstartsj[0] = 0;
5316     rstartsj[0] = 0;
5317     len         = 0; /* total length of j or a array to be sent */
5318     k           = 0;
5319     for (i=0; i<nsends; i++) {
5320       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5321       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5322       for (j=0; j<nrows; j++) {
5323         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5324         for (l=0; l<sbs; l++) {
5325           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5326 
5327           rowlen[j*sbs+l] = ncols;
5328 
5329           len += ncols;
5330           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5331         }
5332         k++;
5333       }
5334       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5335 
5336       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5337     }
5338     /* recvs and sends of i-array are completed */
5339     i = nrecvs;
5340     while (i--) {
5341       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5342     }
5343     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5344 
5345     /* allocate buffers for sending j and a arrays */
5346     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5347     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5348 
5349     /* create i-array of B_oth */
5350     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5351 
5352     b_othi[0] = 0;
5353     len       = 0; /* total length of j or a array to be received */
5354     k         = 0;
5355     for (i=0; i<nrecvs; i++) {
5356       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5357       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5358       for (j=0; j<nrows; j++) {
5359         b_othi[k+1] = b_othi[k] + rowlen[j];
5360         len        += rowlen[j]; k++;
5361       }
5362       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5363     }
5364 
5365     /* allocate space for j and a arrrays of B_oth */
5366     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5367     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5368 
5369     /* j-array */
5370     /*---------*/
5371     /*  post receives of j-array */
5372     for (i=0; i<nrecvs; i++) {
5373       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5374       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5375     }
5376 
5377     /* pack the outgoing message j-array */
5378     k = 0;
5379     for (i=0; i<nsends; i++) {
5380       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5381       bufJ  = bufj+sstartsj[i];
5382       for (j=0; j<nrows; j++) {
5383         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5384         for (ll=0; ll<sbs; ll++) {
5385           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5386           for (l=0; l<ncols; l++) {
5387             *bufJ++ = cols[l];
5388           }
5389           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5390         }
5391       }
5392       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* recvs and sends of j-array are completed */
5396     i = nrecvs;
5397     while (i--) {
5398       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5399     }
5400     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5401   } else if (scall == MAT_REUSE_MATRIX) {
5402     sstartsj = *startsj_s;
5403     rstartsj = *startsj_r;
5404     bufa     = *bufa_ptr;
5405     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5406     b_otha   = b_oth->a;
5407   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5408 
5409   /* a-array */
5410   /*---------*/
5411   /*  post receives of a-array */
5412   for (i=0; i<nrecvs; i++) {
5413     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5414     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5415   }
5416 
5417   /* pack the outgoing message a-array */
5418   k = 0;
5419   for (i=0; i<nsends; i++) {
5420     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5421     bufA  = bufa+sstartsj[i];
5422     for (j=0; j<nrows; j++) {
5423       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5424       for (ll=0; ll<sbs; ll++) {
5425         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5426         for (l=0; l<ncols; l++) {
5427           *bufA++ = vals[l];
5428         }
5429         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5430       }
5431     }
5432     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5433   }
5434   /* recvs and sends of a-array are completed */
5435   i = nrecvs;
5436   while (i--) {
5437     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5438   }
5439   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5440   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5441 
5442   if (scall == MAT_INITIAL_MATRIX) {
5443     /* put together the new matrix */
5444     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5445 
5446     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5447     /* Since these are PETSc arrays, change flags to free them as necessary. */
5448     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5449     b_oth->free_a  = PETSC_TRUE;
5450     b_oth->free_ij = PETSC_TRUE;
5451     b_oth->nonew   = 0;
5452 
5453     ierr = PetscFree(bufj);CHKERRQ(ierr);
5454     if (!startsj_s || !bufa_ptr) {
5455       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5456       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5457     } else {
5458       *startsj_s = sstartsj;
5459       *startsj_r = rstartsj;
5460       *bufa_ptr  = bufa;
5461     }
5462   }
5463   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatGetCommunicationStructs"
5469 /*@C
5470   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5471 
5472   Not Collective
5473 
5474   Input Parameters:
5475 . A - The matrix in mpiaij format
5476 
5477   Output Parameter:
5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5479 . colmap - A map from global column index to local index into lvec
5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5481 
5482   Level: developer
5483 
5484 @*/
5485 #if defined(PETSC_USE_CTABLE)
5486 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5487 #else
5488 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5489 #endif
5490 {
5491   Mat_MPIAIJ *a;
5492 
5493   PetscFunctionBegin;
5494   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5495   PetscValidPointer(lvec, 2);
5496   PetscValidPointer(colmap, 3);
5497   PetscValidPointer(multScatter, 4);
5498   a = (Mat_MPIAIJ*) A->data;
5499   if (lvec) *lvec = a->lvec;
5500   if (colmap) *colmap = a->colmap;
5501   if (multScatter) *multScatter = a->Mvctx;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5506 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5507 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5508 
5509 #undef __FUNCT__
5510 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5511 /*
5512     Computes (B'*A')' since computing B*A directly is untenable
5513 
5514                n                       p                          p
5515         (              )       (              )         (                  )
5516       m (      A       )  *  n (       B      )   =   m (         C        )
5517         (              )       (              )         (                  )
5518 
5519 */
5520 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5521 {
5522   PetscErrorCode ierr;
5523   Mat            At,Bt,Ct;
5524 
5525   PetscFunctionBegin;
5526   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5527   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5528   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5529   ierr = MatDestroy(&At);CHKERRQ(ierr);
5530   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5531   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5532   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5533   PetscFunctionReturn(0);
5534 }
5535 
5536 #undef __FUNCT__
5537 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5538 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5539 {
5540   PetscErrorCode ierr;
5541   PetscInt       m=A->rmap->n,n=B->cmap->n;
5542   Mat            Cmat;
5543 
5544   PetscFunctionBegin;
5545   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5546   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5547   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5548   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5549   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5550   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5551   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5553 
5554   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5555 
5556   *C = Cmat;
5557   PetscFunctionReturn(0);
5558 }
5559 
5560 /* ----------------------------------------------------------------*/
5561 #undef __FUNCT__
5562 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5563 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5564 {
5565   PetscErrorCode ierr;
5566 
5567   PetscFunctionBegin;
5568   if (scall == MAT_INITIAL_MATRIX) {
5569     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5570   }
5571   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5572   PetscFunctionReturn(0);
5573 }
5574 
5575 #if defined(PETSC_HAVE_MUMPS)
5576 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5577 #endif
5578 #if defined(PETSC_HAVE_PASTIX)
5579 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5580 #endif
5581 #if defined(PETSC_HAVE_SUPERLU_DIST)
5582 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5583 #endif
5584 #if defined(PETSC_HAVE_CLIQUE)
5585 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5586 #endif
5587 
5588 /*MC
5589    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5590 
5591    Options Database Keys:
5592 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5593 
5594   Level: beginner
5595 
5596 .seealso: MatCreateAIJ()
5597 M*/
5598 
5599 #undef __FUNCT__
5600 #define __FUNCT__ "MatCreate_MPIAIJ"
5601 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5602 {
5603   Mat_MPIAIJ     *b;
5604   PetscErrorCode ierr;
5605   PetscMPIInt    size;
5606 
5607   PetscFunctionBegin;
5608   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5609 
5610   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5611   B->data       = (void*)b;
5612   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5613   B->assembled  = PETSC_FALSE;
5614   B->insertmode = NOT_SET_VALUES;
5615   b->size       = size;
5616 
5617   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5618 
5619   /* build cache for off array entries formed */
5620   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5621 
5622   b->donotstash  = PETSC_FALSE;
5623   b->colmap      = 0;
5624   b->garray      = 0;
5625   b->roworiented = PETSC_TRUE;
5626 
5627   /* stuff used for matrix vector multiply */
5628   b->lvec  = NULL;
5629   b->Mvctx = NULL;
5630 
5631   /* stuff for MatGetRow() */
5632   b->rowindices   = 0;
5633   b->rowvalues    = 0;
5634   b->getrowactive = PETSC_FALSE;
5635 
5636   /* flexible pointer used in CUSP/CUSPARSE classes */
5637   b->spptr = NULL;
5638 
5639 #if defined(PETSC_HAVE_MUMPS)
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5641 #endif
5642 #if defined(PETSC_HAVE_PASTIX)
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5644 #endif
5645 #if defined(PETSC_HAVE_SUPERLU_DIST)
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5647 #endif
5648 #if defined(PETSC_HAVE_CLIQUE)
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr);
5650 #endif
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5665   PetscFunctionReturn(0);
5666 }
5667 
5668 #undef __FUNCT__
5669 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5670 /*@
5671      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5672          and "off-diagonal" part of the matrix in CSR format.
5673 
5674    Collective on MPI_Comm
5675 
5676    Input Parameters:
5677 +  comm - MPI communicator
5678 .  m - number of local rows (Cannot be PETSC_DECIDE)
5679 .  n - This value should be the same as the local size used in creating the
5680        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5681        calculated if N is given) For square matrices n is almost always m.
5682 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5683 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5684 .   i - row indices for "diagonal" portion of matrix
5685 .   j - column indices
5686 .   a - matrix values
5687 .   oi - row indices for "off-diagonal" portion of matrix
5688 .   oj - column indices
5689 -   oa - matrix values
5690 
5691    Output Parameter:
5692 .   mat - the matrix
5693 
5694    Level: advanced
5695 
5696    Notes:
5697        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5698        must free the arrays once the matrix has been destroyed and not before.
5699 
5700        The i and j indices are 0 based
5701 
5702        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5703 
5704        This sets local rows and cannot be used to set off-processor values.
5705 
5706        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5707        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5708        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5709        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5710        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5711        communication if it is known that only local entries will be set.
5712 
5713 .keywords: matrix, aij, compressed row, sparse, parallel
5714 
5715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5716           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5717 @*/
5718 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5719 {
5720   PetscErrorCode ierr;
5721   Mat_MPIAIJ     *maij;
5722 
5723   PetscFunctionBegin;
5724   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5725   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5726   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5727   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5728   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5729   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5730   maij = (Mat_MPIAIJ*) (*mat)->data;
5731 
5732   (*mat)->preallocated = PETSC_TRUE;
5733 
5734   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5735   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5736 
5737   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5738   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5739 
5740   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5741   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5742   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5743   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5744 
5745   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5748   PetscFunctionReturn(0);
5749 }
5750 
5751 /*
5752     Special version for direct calls from Fortran
5753 */
5754 #include <petsc-private/fortranimpl.h>
5755 
5756 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5757 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5758 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5759 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5760 #endif
5761 
5762 /* Change these macros so can be used in void function */
5763 #undef CHKERRQ
5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5765 #undef SETERRQ2
5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5767 #undef SETERRQ3
5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5769 #undef SETERRQ
5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5771 
5772 #undef __FUNCT__
5773 #define __FUNCT__ "matsetvaluesmpiaij_"
5774 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5775 {
5776   Mat            mat  = *mmat;
5777   PetscInt       m    = *mm, n = *mn;
5778   InsertMode     addv = *maddv;
5779   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5780   PetscScalar    value;
5781   PetscErrorCode ierr;
5782 
5783   MatCheckPreallocated(mat,1);
5784   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5785 
5786 #if defined(PETSC_USE_DEBUG)
5787   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5788 #endif
5789   {
5790     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5791     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5792     PetscBool roworiented = aij->roworiented;
5793 
5794     /* Some Variables required in the macro */
5795     Mat        A                 = aij->A;
5796     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5797     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5798     MatScalar  *aa               = a->a;
5799     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5800     Mat        B                 = aij->B;
5801     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5802     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5803     MatScalar  *ba               = b->a;
5804 
5805     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5806     PetscInt  nonew = a->nonew;
5807     MatScalar *ap1,*ap2;
5808 
5809     PetscFunctionBegin;
5810     for (i=0; i<m; i++) {
5811       if (im[i] < 0) continue;
5812 #if defined(PETSC_USE_DEBUG)
5813       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5814 #endif
5815       if (im[i] >= rstart && im[i] < rend) {
5816         row      = im[i] - rstart;
5817         lastcol1 = -1;
5818         rp1      = aj + ai[row];
5819         ap1      = aa + ai[row];
5820         rmax1    = aimax[row];
5821         nrow1    = ailen[row];
5822         low1     = 0;
5823         high1    = nrow1;
5824         lastcol2 = -1;
5825         rp2      = bj + bi[row];
5826         ap2      = ba + bi[row];
5827         rmax2    = bimax[row];
5828         nrow2    = bilen[row];
5829         low2     = 0;
5830         high2    = nrow2;
5831 
5832         for (j=0; j<n; j++) {
5833           if (roworiented) value = v[i*n+j];
5834           else value = v[i+j*m];
5835           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5836           if (in[j] >= cstart && in[j] < cend) {
5837             col = in[j] - cstart;
5838             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5839           } else if (in[j] < 0) continue;
5840 #if defined(PETSC_USE_DEBUG)
5841           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5842 #endif
5843           else {
5844             if (mat->was_assembled) {
5845               if (!aij->colmap) {
5846                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5847               }
5848 #if defined(PETSC_USE_CTABLE)
5849               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5850               col--;
5851 #else
5852               col = aij->colmap[in[j]] - 1;
5853 #endif
5854               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5855                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5856                 col  =  in[j];
5857                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5858                 B     = aij->B;
5859                 b     = (Mat_SeqAIJ*)B->data;
5860                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5861                 rp2   = bj + bi[row];
5862                 ap2   = ba + bi[row];
5863                 rmax2 = bimax[row];
5864                 nrow2 = bilen[row];
5865                 low2  = 0;
5866                 high2 = nrow2;
5867                 bm    = aij->B->rmap->n;
5868                 ba    = b->a;
5869               }
5870             } else col = in[j];
5871             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5872           }
5873         }
5874       } else if (!aij->donotstash) {
5875         if (roworiented) {
5876           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5877         } else {
5878           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5879         }
5880       }
5881     }
5882   }
5883   PetscFunctionReturnVoid();
5884 }
5885 
5886