xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3bbf0e9209c918da710d8f50ca5c48af17a42e60)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
749   PetscInt          i,*owners = A->rmap->range;
750   PetscInt          *nprocs,j,idx,nsends,row;
751   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
752   PetscInt          *rvalues,count,base,slen,*source;
753   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
754   MPI_Comm          comm;
755   MPI_Request       *send_waits,*recv_waits;
756   MPI_Status        recv_status,*send_status;
757   const PetscScalar *xx;
758   PetscScalar       *bb;
759 #if defined(PETSC_DEBUG)
760   PetscBool found = PETSC_FALSE;
761 #endif
762 
763   PetscFunctionBegin;
764   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
765   /*  first count number of contributors to each processor */
766   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
767   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
768   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
769   j    = 0;
770   for (i=0; i<N; i++) {
771     if (lastidx > (idx = rows[i])) j = 0;
772     lastidx = idx;
773     for (; j<size; j++) {
774       if (idx >= owners[j] && idx < owners[j+1]) {
775         nprocs[2*j]++;
776         nprocs[2*j+1] = 1;
777         owner[i]      = j;
778 #if defined(PETSC_DEBUG)
779         found = PETSC_TRUE;
780 #endif
781         break;
782       }
783     }
784 #if defined(PETSC_DEBUG)
785     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
786     found = PETSC_FALSE;
787 #endif
788   }
789   nsends = 0;
790   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
791 
792   if (A->nooffproczerorows) {
793     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
794     nrecvs = nsends;
795     nmax   = N;
796   } else {
797     /* inform other processors of number of messages and max length*/
798     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
799   }
800 
801   /* post receives:   */
802   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
804   for (i=0; i<nrecvs; i++) {
805     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
806   }
807 
808   /* do sends:
809       1) starts[i] gives the starting index in svalues for stuff going to
810          the ith processor
811   */
812   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
813   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
814   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
815 
816   starts[0] = 0;
817   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
818   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
819 
820   starts[0] = 0;
821   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
822   count = 0;
823   for (i=0; i<size; i++) {
824     if (nprocs[2*i+1]) {
825       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
826     }
827   }
828   ierr = PetscFree(starts);CHKERRQ(ierr);
829 
830   base = owners[rank];
831 
832   /*  wait on receives */
833   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
834   count = nrecvs; slen = 0;
835   while (count) {
836     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
837     /* unpack receives into our local space */
838     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
839 
840     source[imdex] = recv_status.MPI_SOURCE;
841     lens[imdex]   = n;
842     slen         += n;
843     count--;
844   }
845   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
846 
847   /* move the data into the send scatter */
848   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
849   count = 0;
850   for (i=0; i<nrecvs; i++) {
851     values = rvalues + i*nmax;
852     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
853   }
854   ierr = PetscFree(rvalues);CHKERRQ(ierr);
855   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
856   ierr = PetscFree(owner);CHKERRQ(ierr);
857   ierr = PetscFree(nprocs);CHKERRQ(ierr);
858 
859   /* fix right hand side if needed */
860   if (x && b) {
861     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
862     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
863     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
864     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
865     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
866   }
867   /*
868         Zero the required rows. If the "diagonal block" of the matrix
869      is square and the user wishes to set the diagonal we use separate
870      code so that MatSetValues() is not called for each diagonal allocating
871      new memory, thus calling lots of mallocs and slowing things down.
872 
873   */
874   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
875   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
876   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
877     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
878   } else if (diag != 0.0) {
879     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
880     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     for (i = 0; i < slen; i++) {
882       row  = lrows[i] + rstart;
883       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
884     }
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   } else {
888     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(lrows);CHKERRQ(ierr);
891 
892   /* wait on sends */
893   if (nsends) {
894     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
895     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
896     ierr = PetscFree(send_status);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(send_waits);CHKERRQ(ierr);
899   ierr = PetscFree(svalues);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
906 {
907   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
908   PetscErrorCode    ierr;
909   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
910   PetscInt          i,*owners = A->rmap->range;
911   PetscInt          *nprocs,j,idx,nsends;
912   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
913   PetscInt          *rvalues,count,base,slen,*source;
914   PetscInt          *lens,*lrows,*values,m;
915   MPI_Comm          comm;
916   MPI_Request       *send_waits,*recv_waits;
917   MPI_Status        recv_status,*send_status;
918   const PetscScalar *xx;
919   PetscScalar       *bb,*mask;
920   Vec               xmask,lmask;
921   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
922   const PetscInt    *aj, *ii,*ridx;
923   PetscScalar       *aa;
924 #if defined(PETSC_DEBUG)
925   PetscBool found = PETSC_FALSE;
926 #endif
927 
928   PetscFunctionBegin;
929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j    = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i]      = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973 
974   starts[0] = 0;
975   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
976   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
977 
978   starts[0] = 0;
979   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
980   count = 0;
981   for (i=0; i<size; i++) {
982     if (nprocs[2*i+1]) {
983       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
984     }
985   }
986   ierr = PetscFree(starts);CHKERRQ(ierr);
987 
988   base = owners[rank];
989 
990   /*  wait on receives */
991   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
992   count = nrecvs; slen = 0;
993   while (count) {
994     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
995     /* unpack receives into our local space */
996     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
997 
998     source[imdex] = recv_status.MPI_SOURCE;
999     lens[imdex]   = n;
1000     slen         += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1011   }
1012   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1013   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1014   ierr = PetscFree(owner);CHKERRQ(ierr);
1015   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1016   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1017 
1018   /* zero diagonal part of matrix */
1019   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1020 
1021   /* handle off diagonal part of matrix */
1022   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1023   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1024   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1025   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use) {
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++) {
1050       n  = ii[i+1] - ii[i];
1051       aj = aij->j + ii[i];
1052       aa = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n  = ii[i+1] - ii[i];
1068       aj = aij->j + ii[i];
1069       aa = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMult_MPIAIJ"
1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105   PetscInt       nt;
1106 
1107   PetscFunctionBegin;
1108   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1109   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1110   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150   PetscBool      merged;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1154   /* do nondiagonal part */
1155   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1156   if (!merged) {
1157     /* send it on its way */
1158     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     /* do local part */
1160     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1161     /* receive remote parts: note this assumes the values are not actually */
1162     /* added in yy until the next line, */
1163     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   } else {
1165     /* do local part */
1166     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1167     /* send it on its way */
1168     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     /* values actually were received in the Begin() but we need to call this nop */
1170     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1177 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1178 {
1179   MPI_Comm       comm;
1180   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1181   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1182   IS             Me,Notme;
1183   PetscErrorCode ierr;
1184   PetscInt       M,N,first,last,*notme,i;
1185   PetscMPIInt    size;
1186 
1187   PetscFunctionBegin;
1188   /* Easy test: symmetric diagonal block */
1189   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1190   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1191   if (!*f) PetscFunctionReturn(0);
1192   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1194   if (size == 1) PetscFunctionReturn(0);
1195 
1196   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1197   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1198   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1199   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1200   for (i=0; i<first; i++) notme[i] = i;
1201   for (i=last; i<M; i++) notme[i-last+first] = i;
1202   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1203   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1205   Aoff = Aoffs[0];
1206   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1207   Boff = Boffs[0];
1208   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1209   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1211   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1213   ierr = PetscFree(notme);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1220 {
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   /* do nondiagonal part */
1226   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1227   /* send it on its way */
1228   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   /* do local part */
1230   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1231   /* receive remote parts */
1232   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 /*
1237   This only works correctly for square matrices where the subblock A->A is the
1238    diagonal block
1239 */
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1243 {
1244   PetscErrorCode ierr;
1245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1246 
1247   PetscFunctionBegin;
1248   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1249   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1250   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatScale_MPIAIJ"
1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1257 {
1258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1263   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_MPIAIJ"
1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1270 {
1271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1277 #endif
1278   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1279   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1282 #if defined(PETSC_USE_CTABLE)
1283   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1284 #else
1285   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1286 #endif
1287   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1288   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1289   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1290   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1291   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1312   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1313   PetscErrorCode ierr;
1314   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1315   int            fd;
1316   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1317   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1318   PetscScalar    *column_values;
1319   PetscInt       message_count,flowcontrolcount;
1320   FILE           *file;
1321 
1322   PetscFunctionBegin;
1323   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1324   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1325   nz   = A->nz + B->nz;
1326   if (!rank) {
1327     header[0] = MAT_FILE_CLASSID;
1328     header[1] = mat->rmap->N;
1329     header[2] = mat->cmap->N;
1330 
1331     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1333     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     /* get largest number of rows any processor has */
1335     rlen  = mat->rmap->n;
1336     range = mat->rmap->range;
1337     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1338   } else {
1339     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340     rlen = mat->rmap->n;
1341   }
1342 
1343   /* load up the local row counts */
1344   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1345   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1346 
1347   /* store the row lengths to the file */
1348   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1349   if (!rank) {
1350     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1351     for (i=1; i<size; i++) {
1352       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1353       rlen = range[i+1] - range[i];
1354       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1355       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1356     }
1357     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1358   } else {
1359     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1360     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1364 
1365   /* load up the local column indices */
1366   nzmax = nz; /* th processor needs space a largest processor needs */
1367   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1368   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1369   cnt   = 0;
1370   for (i=0; i<mat->rmap->n; i++) {
1371     for (j=B->i[i]; j<B->i[i+1]; j++) {
1372       if ((col = garray[B->j[j]]) > cstart) break;
1373       column_indices[cnt++] = col;
1374     }
1375     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1376     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1377   }
1378   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1379 
1380   /* store the column indices to the file */
1381   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1382   if (!rank) {
1383     MPI_Status status;
1384     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1385     for (i=1; i<size; i++) {
1386       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1387       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1388       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1389       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1391     }
1392     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1393   } else {
1394     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1395     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1396     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1398   }
1399   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1400 
1401   /* load up the local column values */
1402   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1403   cnt  = 0;
1404   for (i=0; i<mat->rmap->n; i++) {
1405     for (j=B->i[i]; j<B->i[i+1]; j++) {
1406       if (garray[B->j[j]] > cstart) break;
1407       column_values[cnt++] = B->a[j];
1408     }
1409     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1410     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1411   }
1412   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1413 
1414   /* store the column values to the file */
1415   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1416   if (!rank) {
1417     MPI_Status status;
1418     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1419     for (i=1; i<size; i++) {
1420       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1421       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1422       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1423       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1424       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1425     }
1426     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1427   } else {
1428     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1429     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1430     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1432   }
1433   ierr = PetscFree(column_values);CHKERRQ(ierr);
1434 
1435   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1436   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #include <petscdraw.h>
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1443 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1444 {
1445   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1446   PetscErrorCode    ierr;
1447   PetscMPIInt       rank = aij->rank,size = aij->size;
1448   PetscBool         isdraw,iascii,isbinary;
1449   PetscViewer       sviewer;
1450   PetscViewerFormat format;
1451 
1452   PetscFunctionBegin;
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1456   if (iascii) {
1457     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1458     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1459       MatInfo   info;
1460       PetscBool inodes;
1461 
1462       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1463       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1466       if (!inodes) {
1467         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1468                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1469       } else {
1470         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1471                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1472       }
1473       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1475       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1477       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1478       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1480       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1481       PetscFunctionReturn(0);
1482     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1483       PetscInt inodecount,inodelimit,*inodes;
1484       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1485       if (inodes) {
1486         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1487       } else {
1488         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1489       }
1490       PetscFunctionReturn(0);
1491     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1492       PetscFunctionReturn(0);
1493     }
1494   } else if (isbinary) {
1495     if (size == 1) {
1496       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1497       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1498     } else {
1499       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1500     }
1501     PetscFunctionReturn(0);
1502   } else if (isdraw) {
1503     PetscDraw draw;
1504     PetscBool isnull;
1505     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1506     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1507   }
1508 
1509   if (size == 1) {
1510     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1511     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1512   } else {
1513     /* assemble the entire matrix onto first processor. */
1514     Mat        A;
1515     Mat_SeqAIJ *Aloc;
1516     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1517     MatScalar  *a;
1518 
1519     if (mat->rmap->N > 1024) {
1520       PetscBool flg = PETSC_FALSE;
1521 
1522       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1523       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1524     }
1525 
1526     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1527     if (!rank) {
1528       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1529     } else {
1530       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1531     }
1532     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1533     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1534     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1535     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1536     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1537 
1538     /* copy over the A part */
1539     Aloc = (Mat_SeqAIJ*)aij->A->data;
1540     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1541     row  = mat->rmap->rstart;
1542     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1543     for (i=0; i<m; i++) {
1544       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1545       row++;
1546       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1547     }
1548     aj = Aloc->j;
1549     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1550 
1551     /* copy over the B part */
1552     Aloc = (Mat_SeqAIJ*)aij->B->data;
1553     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1554     row  = mat->rmap->rstart;
1555     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1556     ct   = cols;
1557     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1558     for (i=0; i<m; i++) {
1559       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1560       row++;
1561       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1562     }
1563     ierr = PetscFree(ct);CHKERRQ(ierr);
1564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     /*
1567        Everyone has to call to draw the matrix since the graphics waits are
1568        synchronized across all processors that share the PetscDraw object
1569     */
1570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1571     if (!rank) {
1572       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1573       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1574       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1575       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1576     }
1577     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1578     ierr = MatDestroy(&A);CHKERRQ(ierr);
1579   }
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatView_MPIAIJ"
1585 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1586 {
1587   PetscErrorCode ierr;
1588   PetscBool      iascii,isdraw,issocket,isbinary;
1589 
1590   PetscFunctionBegin;
1591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1595   if (iascii || isdraw || isbinary || issocket) {
1596     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatSOR_MPIAIJ"
1603 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1604 {
1605   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1606   PetscErrorCode ierr;
1607   Vec            bb1 = 0;
1608   PetscBool      hasop;
1609 
1610   PetscFunctionBegin;
1611   if (flag == SOR_APPLY_UPPER) {
1612     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613     PetscFunctionReturn(0);
1614   }
1615 
1616   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1617     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1618   }
1619 
1620   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1621     if (flag & SOR_ZERO_INITIAL_GUESS) {
1622       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1623       its--;
1624     }
1625 
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1654     if (flag & SOR_ZERO_INITIAL_GUESS) {
1655       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1656       its--;
1657     }
1658     while (its--) {
1659       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661 
1662       /* update rhs: bb1 = bb - B*x */
1663       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1664       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1665 
1666       /* local sweep */
1667       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1668     }
1669   } else if (flag & SOR_EISENSTAT) {
1670     Vec xx1;
1671 
1672     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1673     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1674 
1675     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1676     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     if (!mat->diag) {
1678       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1679       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1680     }
1681     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1682     if (hasop) {
1683       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1684     } else {
1685       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1686     }
1687     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1688 
1689     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1690 
1691     /* local sweep */
1692     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1693     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1694     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1695   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1696 
1697   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatPermute_MPIAIJ"
1703 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1704 {
1705   Mat            aA,aB,Aperm;
1706   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1707   PetscScalar    *aa,*ba;
1708   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1709   PetscSF        rowsf,sf;
1710   IS             parcolp = NULL;
1711   PetscBool      done;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1716   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1717   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1718   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1719 
1720   /* Invert row permutation to find out where my rows should go */
1721   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1722   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1723   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1724   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1725   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1726   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727 
1728   /* Invert column permutation to find out where my columns should go */
1729   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1730   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1731   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1732   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1733   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1734   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1736 
1737   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1738   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1739   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1740 
1741   /* Find out where my gcols should go */
1742   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1743   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1744   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1745   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1746   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1747   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1748   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1750 
1751   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1752   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1756   for (i=0; i<m; i++) {
1757     PetscInt row = rdest[i],rowner;
1758     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1759     for (j=ai[i]; j<ai[i+1]; j++) {
1760       PetscInt cowner,col = cdest[aj[j]];
1761       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1762       if (rowner == cowner) dnnz[i]++;
1763       else onnz[i]++;
1764     }
1765     for (j=bi[i]; j<bi[i+1]; j++) {
1766       PetscInt cowner,col = gcdest[bj[j]];
1767       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1768       if (rowner == cowner) dnnz[i]++;
1769       else onnz[i]++;
1770     }
1771   }
1772   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1775   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1779 
1780   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1781   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1783   for (i=0; i<m; i++) {
1784     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1785     PetscInt rowlen;
1786     rowlen = ai[i+1] - ai[i];
1787     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1788     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1789     rowlen = bi[i+1] - bi[i];
1790     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1791     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1792   }
1793   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1797   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1799   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1800   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1801   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1802   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1803   *B = Aperm;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1809 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1810 {
1811   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1812   Mat            A    = mat->A,B = mat->B;
1813   PetscErrorCode ierr;
1814   PetscReal      isend[5],irecv[5];
1815 
1816   PetscFunctionBegin;
1817   info->block_size = 1.0;
1818   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1819 
1820   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1821   isend[3] = info->memory;  isend[4] = info->mallocs;
1822 
1823   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1824 
1825   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1826   isend[3] += info->memory;  isend[4] += info->mallocs;
1827   if (flag == MAT_LOCAL) {
1828     info->nz_used      = isend[0];
1829     info->nz_allocated = isend[1];
1830     info->nz_unneeded  = isend[2];
1831     info->memory       = isend[3];
1832     info->mallocs      = isend[4];
1833   } else if (flag == MAT_GLOBAL_MAX) {
1834     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1835 
1836     info->nz_used      = irecv[0];
1837     info->nz_allocated = irecv[1];
1838     info->nz_unneeded  = irecv[2];
1839     info->memory       = irecv[3];
1840     info->mallocs      = irecv[4];
1841   } else if (flag == MAT_GLOBAL_SUM) {
1842     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1843 
1844     info->nz_used      = irecv[0];
1845     info->nz_allocated = irecv[1];
1846     info->nz_unneeded  = irecv[2];
1847     info->memory       = irecv[3];
1848     info->mallocs      = irecv[4];
1849   }
1850   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1851   info->fill_ratio_needed = 0;
1852   info->factor_mallocs    = 0;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatSetOption_MPIAIJ"
1858 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1859 {
1860   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   switch (op) {
1865   case MAT_NEW_NONZERO_LOCATIONS:
1866   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1867   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1868   case MAT_KEEP_NONZERO_PATTERN:
1869   case MAT_NEW_NONZERO_LOCATION_ERR:
1870   case MAT_USE_INODES:
1871   case MAT_IGNORE_ZERO_ENTRIES:
1872     MatCheckPreallocated(A,1);
1873     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1874     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1875     break;
1876   case MAT_ROW_ORIENTED:
1877     a->roworiented = flg;
1878 
1879     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1880     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1881     break;
1882   case MAT_NEW_DIAGONALS:
1883     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1884     break;
1885   case MAT_IGNORE_OFF_PROC_ENTRIES:
1886     a->donotstash = flg;
1887     break;
1888   case MAT_SPD:
1889     A->spd_set = PETSC_TRUE;
1890     A->spd     = flg;
1891     if (flg) {
1892       A->symmetric                  = PETSC_TRUE;
1893       A->structurally_symmetric     = PETSC_TRUE;
1894       A->symmetric_set              = PETSC_TRUE;
1895       A->structurally_symmetric_set = PETSC_TRUE;
1896     }
1897     break;
1898   case MAT_SYMMETRIC:
1899     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1900     break;
1901   case MAT_STRUCTURALLY_SYMMETRIC:
1902     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1903     break;
1904   case MAT_HERMITIAN:
1905     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1906     break;
1907   case MAT_SYMMETRY_ETERNAL:
1908     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1909     break;
1910   default:
1911     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatGetRow_MPIAIJ"
1918 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919 {
1920   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1921   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1922   PetscErrorCode ierr;
1923   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1924   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1925   PetscInt       *cmap,*idx_p;
1926 
1927   PetscFunctionBegin;
1928   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1929   mat->getrowactive = PETSC_TRUE;
1930 
1931   if (!mat->rowvalues && (idx || v)) {
1932     /*
1933         allocate enough space to hold information from the longest row.
1934     */
1935     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1936     PetscInt   max = 1,tmp;
1937     for (i=0; i<matin->rmap->n; i++) {
1938       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1939       if (max < tmp) max = tmp;
1940     }
1941     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1942   }
1943 
1944   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1945   lrow = row - rstart;
1946 
1947   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1948   if (!v)   {pvA = 0; pvB = 0;}
1949   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1950   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1951   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1952   nztot = nzA + nzB;
1953 
1954   cmap = mat->garray;
1955   if (v  || idx) {
1956     if (nztot) {
1957       /* Sort by increasing column numbers, assuming A and B already sorted */
1958       PetscInt imark = -1;
1959       if (v) {
1960         *v = v_p = mat->rowvalues;
1961         for (i=0; i<nzB; i++) {
1962           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1963           else break;
1964         }
1965         imark = i;
1966         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1967         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1968       }
1969       if (idx) {
1970         *idx = idx_p = mat->rowindices;
1971         if (imark > -1) {
1972           for (i=0; i<imark; i++) {
1973             idx_p[i] = cmap[cworkB[i]];
1974           }
1975         } else {
1976           for (i=0; i<nzB; i++) {
1977             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1978             else break;
1979           }
1980           imark = i;
1981         }
1982         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1983         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1984       }
1985     } else {
1986       if (idx) *idx = 0;
1987       if (v)   *v   = 0;
1988     }
1989   }
1990   *nz  = nztot;
1991   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1992   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1998 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1999 {
2000   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2001 
2002   PetscFunctionBegin;
2003   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2004   aij->getrowactive = PETSC_FALSE;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatNorm_MPIAIJ"
2010 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2011 {
2012   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2013   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2014   PetscErrorCode ierr;
2015   PetscInt       i,j,cstart = mat->cmap->rstart;
2016   PetscReal      sum = 0.0;
2017   MatScalar      *v;
2018 
2019   PetscFunctionBegin;
2020   if (aij->size == 1) {
2021     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2022   } else {
2023     if (type == NORM_FROBENIUS) {
2024       v = amat->a;
2025       for (i=0; i<amat->nz; i++) {
2026         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2027       }
2028       v = bmat->a;
2029       for (i=0; i<bmat->nz; i++) {
2030         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2031       }
2032       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2033       *norm = PetscSqrtReal(*norm);
2034     } else if (type == NORM_1) { /* max column norm */
2035       PetscReal *tmp,*tmp2;
2036       PetscInt  *jj,*garray = aij->garray;
2037       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2039       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2040       *norm = 0.0;
2041       v     = amat->a; jj = amat->j;
2042       for (j=0; j<amat->nz; j++) {
2043         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2044       }
2045       v = bmat->a; jj = bmat->j;
2046       for (j=0; j<bmat->nz; j++) {
2047         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2048       }
2049       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2050       for (j=0; j<mat->cmap->N; j++) {
2051         if (tmp2[j] > *norm) *norm = tmp2[j];
2052       }
2053       ierr = PetscFree(tmp);CHKERRQ(ierr);
2054       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2055     } else if (type == NORM_INFINITY) { /* max row norm */
2056       PetscReal ntemp = 0.0;
2057       for (j=0; j<aij->A->rmap->n; j++) {
2058         v   = amat->a + amat->i[j];
2059         sum = 0.0;
2060         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2061           sum += PetscAbsScalar(*v); v++;
2062         }
2063         v = bmat->a + bmat->i[j];
2064         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2065           sum += PetscAbsScalar(*v); v++;
2066         }
2067         if (sum > ntemp) ntemp = sum;
2068       }
2069       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2070     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatTranspose_MPIAIJ"
2077 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2078 {
2079   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2080   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2081   PetscErrorCode ierr;
2082   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2083   PetscInt       cstart = A->cmap->rstart,ncol;
2084   Mat            B;
2085   MatScalar      *array;
2086 
2087   PetscFunctionBegin;
2088   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2089 
2090   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2091   ai = Aloc->i; aj = Aloc->j;
2092   bi = Bloc->i; bj = Bloc->j;
2093   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2094     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2095     PetscSFNode          *oloc;
2096     PETSC_UNUSED PetscSF sf;
2097 
2098     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2099     /* compute d_nnz for preallocation */
2100     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2101     for (i=0; i<ai[ma]; i++) {
2102       d_nnz[aj[i]]++;
2103       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2104     }
2105     /* compute local off-diagonal contributions */
2106     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2107     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2108     /* map those to global */
2109     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2110     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2111     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2112     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2113     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2114     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2116 
2117     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2118     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2119     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2120     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2122     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2123   } else {
2124     B    = *matout;
2125     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2126     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2127   }
2128 
2129   /* copy over the A part */
2130   array = Aloc->a;
2131   row   = A->rmap->rstart;
2132   for (i=0; i<ma; i++) {
2133     ncol = ai[i+1]-ai[i];
2134     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2135     row++;
2136     array += ncol; aj += ncol;
2137   }
2138   aj = Aloc->j;
2139   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2140 
2141   /* copy over the B part */
2142   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2143   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2144   array = Bloc->a;
2145   row   = A->rmap->rstart;
2146   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2147   cols_tmp = cols;
2148   for (i=0; i<mb; i++) {
2149     ncol = bi[i+1]-bi[i];
2150     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2151     row++;
2152     array += ncol; cols_tmp += ncol;
2153   }
2154   ierr = PetscFree(cols);CHKERRQ(ierr);
2155 
2156   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2159     *matout = B;
2160   } else {
2161     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2168 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2169 {
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171   Mat            a    = aij->A,b = aij->B;
2172   PetscErrorCode ierr;
2173   PetscInt       s1,s2,s3;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2177   if (rr) {
2178     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2179     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2180     /* Overlap communication with computation. */
2181     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2182   }
2183   if (ll) {
2184     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2185     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2186     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2187   }
2188   /* scale  the diagonal block */
2189   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2190 
2191   if (rr) {
2192     /* Do a scatter end and then right scale the off-diagonal block */
2193     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2194     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2201 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatEqual_MPIAIJ"
2213 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2214 {
2215   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2216   Mat            a,b,c,d;
2217   PetscBool      flg;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   a = matA->A; b = matA->B;
2222   c = matB->A; d = matB->B;
2223 
2224   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2225   if (flg) {
2226     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2227   }
2228   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatCopy_MPIAIJ"
2234 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2238   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2239 
2240   PetscFunctionBegin;
2241   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2242   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2243     /* because of the column compression in the off-processor part of the matrix a->B,
2244        the number of columns in a->B and b->B may be different, hence we cannot call
2245        the MatCopy() directly on the two parts. If need be, we can provide a more
2246        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2247        then copying the submatrices */
2248     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2249   } else {
2250     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2251     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatSetUp_MPIAIJ"
2258 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2259 {
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2269 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2270 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2271 {
2272   PetscInt       i,m=Y->rmap->N;
2273   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2274   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2275   const PetscInt *xi = x->i,*yi = y->i;
2276 
2277   PetscFunctionBegin;
2278   /* Set the number of nonzeros in the new matrix */
2279   for (i=0; i<m; i++) {
2280     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2281     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2282     nnz[i] = 0;
2283     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2284       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2285       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2286       nnz[i]++;
2287     }
2288     for (; k<nzy; k++) nnz[i]++;
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatAXPY_MPIAIJ"
2295 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2296 {
2297   PetscErrorCode ierr;
2298   PetscInt       i;
2299   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2300   PetscBLASInt   bnz,one=1;
2301   Mat_SeqAIJ     *x,*y;
2302 
2303   PetscFunctionBegin;
2304   if (str == SAME_NONZERO_PATTERN) {
2305     PetscScalar alpha = a;
2306     x    = (Mat_SeqAIJ*)xx->A->data;
2307     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2308     y    = (Mat_SeqAIJ*)yy->A->data;
2309     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2310     x    = (Mat_SeqAIJ*)xx->B->data;
2311     y    = (Mat_SeqAIJ*)yy->B->data;
2312     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2313     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2314   } else if (str == SUBSET_NONZERO_PATTERN) {
2315     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2316 
2317     x = (Mat_SeqAIJ*)xx->B->data;
2318     y = (Mat_SeqAIJ*)yy->B->data;
2319     if (y->xtoy && y->XtoY != xx->B) {
2320       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2321       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2322     }
2323     if (!y->xtoy) { /* get xtoy */
2324       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2325       y->XtoY = xx->B;
2326       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2327     }
2328     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2329   } else {
2330     Mat      B;
2331     PetscInt *nnz_d,*nnz_o;
2332     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2333     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2334     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2335     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2336     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2337     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2338     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2339     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2341     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2342     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2343     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2344     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2346   }
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatConjugate_MPIAIJ"
2354 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2355 {
2356 #if defined(PETSC_USE_COMPLEX)
2357   PetscErrorCode ierr;
2358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2359 
2360   PetscFunctionBegin;
2361   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2362   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2363 #else
2364   PetscFunctionBegin;
2365 #endif
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatRealPart_MPIAIJ"
2371 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2372 {
2373   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2374   PetscErrorCode ierr;
2375 
2376   PetscFunctionBegin;
2377   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2378   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2384 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2385 {
2386   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2391   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #if defined(PETSC_HAVE_PBGL)
2396 
2397 #include <boost/parallel/mpi/bsp_process_group.hpp>
2398 #include <boost/graph/distributed/ilu_default_graph.hpp>
2399 #include <boost/graph/distributed/ilu_0_block.hpp>
2400 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2401 #include <boost/graph/distributed/petsc/interface.hpp>
2402 #include <boost/multi_array.hpp>
2403 #include <boost/parallel/distributed_property_map->hpp>
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2407 /*
2408   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2409 */
2410 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2411 {
2412   namespace petsc = boost::distributed::petsc;
2413 
2414   namespace graph_dist = boost::graph::distributed;
2415   using boost::graph::distributed::ilu_default::process_group_type;
2416   using boost::graph::ilu_permuted;
2417 
2418   PetscBool      row_identity, col_identity;
2419   PetscContainer c;
2420   PetscInt       m, n, M, N;
2421   PetscErrorCode ierr;
2422 
2423   PetscFunctionBegin;
2424   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2425   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2426   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2427   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2428 
2429   process_group_type pg;
2430   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2431   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2432   lgraph_type& level_graph = *lgraph_p;
2433   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2434 
2435   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2436   ilu_permuted(level_graph);
2437 
2438   /* put together the new matrix */
2439   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2440   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2441   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2442   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2443   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2444   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2445   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2446   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447 
2448   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2449   ierr = PetscContainerSetPointer(c, lgraph_p);
2450   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2451   ierr = PetscContainerDestroy(&c);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2457 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2458 {
2459   PetscFunctionBegin;
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatSolve_MPIAIJ"
2465 /*
2466   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2467 */
2468 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2469 {
2470   namespace graph_dist = boost::graph::distributed;
2471 
2472   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2473   lgraph_type    *lgraph_p;
2474   PetscContainer c;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2479   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2480   ierr = VecCopy(b, x);CHKERRQ(ierr);
2481 
2482   PetscScalar *array_x;
2483   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2484   PetscInt sx;
2485   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2486 
2487   PetscScalar *array_b;
2488   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2489   PetscInt sb;
2490   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2491 
2492   lgraph_type& level_graph = *lgraph_p;
2493   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2494 
2495   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2496   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2497   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2498 
2499   typedef boost::iterator_property_map<array_ref_type::iterator,
2500                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2501   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2502   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2503 
2504   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2505   PetscFunctionReturn(0);
2506 }
2507 #endif
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatDestroy_MatRedundant"
2511 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2512 {
2513   PetscErrorCode ierr;
2514   Mat_Redundant  *redund;
2515   PetscInt       i;
2516   PetscMPIInt    size;
2517 
2518   PetscFunctionBegin;
2519   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2520   if (size == 1) {
2521     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2522     redund = a->redundant;
2523   } else {
2524     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2525     redund = a->redundant;
2526   }
2527   if (redund){
2528     if (redund->matseq) { /* via MatGetSubMatrices()  */
2529       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2530       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2531       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2532       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2533     } else {
2534       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2535       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2537       for (i=0; i<redund->nrecvs; i++) {
2538         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2539         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2540       }
2541       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2542     }
2543 
2544     if (redund->psubcomm) {
2545       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2546     }
2547     ierr = redund->Destroy(A);CHKERRQ(ierr);
2548     ierr = PetscFree(redund);CHKERRQ(ierr);
2549   }
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2555 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2556 {
2557   PetscMPIInt    rank,size;
2558   MPI_Comm       comm;
2559   PetscErrorCode ierr;
2560   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2561   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2562   PetscInt       *rowrange = mat->rmap->range;
2563   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2564   Mat            A = aij->A,B=aij->B,C=*matredundant;
2565   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2566   PetscScalar    *sbuf_a;
2567   PetscInt       nzlocal=a->nz+b->nz;
2568   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2569   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2570   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2571   MatScalar      *aworkA,*aworkB;
2572   PetscScalar    *vals;
2573   PetscMPIInt    tag1,tag2,tag3,imdex;
2574   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2575   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2576   MPI_Status     recv_status,*send_status;
2577   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2578   PetscInt       **rbuf_j=NULL;
2579   PetscScalar    **rbuf_a=NULL;
2580   Mat_Redundant  *redund =NULL;
2581 
2582   PetscFunctionBegin;
2583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2584   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2586   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2587   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2588 
2589   if (reuse == MAT_REUSE_MATRIX) {
2590     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2591     if (subsize == 1) {
2592       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2593       redund = c->redundant;
2594     } else {
2595       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2596       redund = c->redundant;
2597     }
2598     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2599 
2600     nsends    = redund->nsends;
2601     nrecvs    = redund->nrecvs;
2602     send_rank = redund->send_rank;
2603     recv_rank = redund->recv_rank;
2604     sbuf_nz   = redund->sbuf_nz;
2605     rbuf_nz   = redund->rbuf_nz;
2606     sbuf_j    = redund->sbuf_j;
2607     sbuf_a    = redund->sbuf_a;
2608     rbuf_j    = redund->rbuf_j;
2609     rbuf_a    = redund->rbuf_a;
2610   }
2611 
2612   if (reuse == MAT_INITIAL_MATRIX) {
2613     PetscInt    nleftover,np_subcomm;
2614 
2615     /* get the destination processors' id send_rank, nsends and nrecvs */
2616     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2617 
2618     np_subcomm = size/nsubcomm;
2619     nleftover  = size - nsubcomm*np_subcomm;
2620 
2621     /* block of codes below is specific for INTERLACED */
2622     /* ------------------------------------------------*/
2623     nsends = 0; nrecvs = 0;
2624     for (i=0; i<size; i++) {
2625       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2626         send_rank[nsends++] = i;
2627         recv_rank[nrecvs++] = i;
2628       }
2629     }
2630     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2631       i = size-nleftover-1;
2632       j = 0;
2633       while (j < nsubcomm - nleftover) {
2634         send_rank[nsends++] = i;
2635         i--; j++;
2636       }
2637     }
2638 
2639     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2640       for (i=0; i<nleftover; i++) {
2641         recv_rank[nrecvs++] = size-nleftover+i;
2642       }
2643     }
2644     /*----------------------------------------------*/
2645 
2646     /* allocate sbuf_j, sbuf_a */
2647     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2648     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2649     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2650     /*
2651     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2652     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2653      */
2654   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2655 
2656   /* copy mat's local entries into the buffers */
2657   if (reuse == MAT_INITIAL_MATRIX) {
2658     rownz_max = 0;
2659     rptr      = sbuf_j;
2660     cols      = sbuf_j + rend-rstart + 1;
2661     vals      = sbuf_a;
2662     rptr[0]   = 0;
2663     for (i=0; i<rend-rstart; i++) {
2664       row    = i + rstart;
2665       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2666       ncols  = nzA + nzB;
2667       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2668       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2669       /* load the column indices for this row into cols */
2670       lwrite = 0;
2671       for (l=0; l<nzB; l++) {
2672         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2673           vals[lwrite]   = aworkB[l];
2674           cols[lwrite++] = ctmp;
2675         }
2676       }
2677       for (l=0; l<nzA; l++) {
2678         vals[lwrite]   = aworkA[l];
2679         cols[lwrite++] = cstart + cworkA[l];
2680       }
2681       for (l=0; l<nzB; l++) {
2682         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2683           vals[lwrite]   = aworkB[l];
2684           cols[lwrite++] = ctmp;
2685         }
2686       }
2687       vals     += ncols;
2688       cols     += ncols;
2689       rptr[i+1] = rptr[i] + ncols;
2690       if (rownz_max < ncols) rownz_max = ncols;
2691     }
2692     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2693   } else { /* only copy matrix values into sbuf_a */
2694     rptr    = sbuf_j;
2695     vals    = sbuf_a;
2696     rptr[0] = 0;
2697     for (i=0; i<rend-rstart; i++) {
2698       row    = i + rstart;
2699       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2700       ncols  = nzA + nzB;
2701       cworkB = b->j + b->i[i];
2702       aworkA = a->a + a->i[i];
2703       aworkB = b->a + b->i[i];
2704       lwrite = 0;
2705       for (l=0; l<nzB; l++) {
2706         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2707       }
2708       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2709       for (l=0; l<nzB; l++) {
2710         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2711       }
2712       vals     += ncols;
2713       rptr[i+1] = rptr[i] + ncols;
2714     }
2715   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2716 
2717   /* send nzlocal to others, and recv other's nzlocal */
2718   /*--------------------------------------------------*/
2719   if (reuse == MAT_INITIAL_MATRIX) {
2720     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2721 
2722     s_waits2 = s_waits3 + nsends;
2723     s_waits1 = s_waits2 + nsends;
2724     r_waits1 = s_waits1 + nsends;
2725     r_waits2 = r_waits1 + nrecvs;
2726     r_waits3 = r_waits2 + nrecvs;
2727   } else {
2728     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2729 
2730     r_waits3 = s_waits3 + nsends;
2731   }
2732 
2733   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2734   if (reuse == MAT_INITIAL_MATRIX) {
2735     /* get new tags to keep the communication clean */
2736     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2737     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2738     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2739 
2740     /* post receives of other's nzlocal */
2741     for (i=0; i<nrecvs; i++) {
2742       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2743     }
2744     /* send nzlocal to others */
2745     for (i=0; i<nsends; i++) {
2746       sbuf_nz[i] = nzlocal;
2747       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2748     }
2749     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2750     count = nrecvs;
2751     while (count) {
2752       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2753 
2754       recv_rank[imdex] = recv_status.MPI_SOURCE;
2755       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2756       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2757 
2758       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2759 
2760       rbuf_nz[imdex] += i + 2;
2761 
2762       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2763       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2764       count--;
2765     }
2766     /* wait on sends of nzlocal */
2767     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2768     /* send mat->i,j to others, and recv from other's */
2769     /*------------------------------------------------*/
2770     for (i=0; i<nsends; i++) {
2771       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2772       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2773     }
2774     /* wait on receives of mat->i,j */
2775     /*------------------------------*/
2776     count = nrecvs;
2777     while (count) {
2778       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2779       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2780       count--;
2781     }
2782     /* wait on sends of mat->i,j */
2783     /*---------------------------*/
2784     if (nsends) {
2785       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2786     }
2787   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2788 
2789   /* post receives, send and receive mat->a */
2790   /*----------------------------------------*/
2791   for (imdex=0; imdex<nrecvs; imdex++) {
2792     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2793   }
2794   for (i=0; i<nsends; i++) {
2795     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2796   }
2797   count = nrecvs;
2798   while (count) {
2799     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2800     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2801     count--;
2802   }
2803   if (nsends) {
2804     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2805   }
2806 
2807   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2808 
2809   /* create redundant matrix */
2810   /*-------------------------*/
2811   if (reuse == MAT_INITIAL_MATRIX) {
2812     const PetscInt *range;
2813     PetscInt       rstart_sub,rend_sub,mloc_sub;
2814 
2815     /* compute rownz_max for preallocation */
2816     for (imdex=0; imdex<nrecvs; imdex++) {
2817       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2818       rptr = rbuf_j[imdex];
2819       for (i=0; i<j; i++) {
2820         ncols = rptr[i+1] - rptr[i];
2821         if (rownz_max < ncols) rownz_max = ncols;
2822       }
2823     }
2824 
2825     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2826 
2827     /* get local size of redundant matrix
2828        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2829     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2830     rstart_sub = range[nsubcomm*subrank];
2831     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2832       rend_sub = range[nsubcomm*(subrank+1)];
2833     } else {
2834       rend_sub = mat->rmap->N;
2835     }
2836     mloc_sub = rend_sub - rstart_sub;
2837 
2838     if (M == N) {
2839       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2840     } else { /* non-square matrix */
2841       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2842     }
2843     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2844     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2845     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2846     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2847   } else {
2848     C = *matredundant;
2849   }
2850 
2851   /* insert local matrix entries */
2852   rptr = sbuf_j;
2853   cols = sbuf_j + rend-rstart + 1;
2854   vals = sbuf_a;
2855   for (i=0; i<rend-rstart; i++) {
2856     row   = i + rstart;
2857     ncols = rptr[i+1] - rptr[i];
2858     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2859     vals += ncols;
2860     cols += ncols;
2861   }
2862   /* insert received matrix entries */
2863   for (imdex=0; imdex<nrecvs; imdex++) {
2864     rstart = rowrange[recv_rank[imdex]];
2865     rend   = rowrange[recv_rank[imdex]+1];
2866     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2867     rptr   = rbuf_j[imdex];
2868     cols   = rbuf_j[imdex] + rend-rstart + 1;
2869     vals   = rbuf_a[imdex];
2870     for (i=0; i<rend-rstart; i++) {
2871       row   = i + rstart;
2872       ncols = rptr[i+1] - rptr[i];
2873       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2874       vals += ncols;
2875       cols += ncols;
2876     }
2877   }
2878   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880 
2881   if (reuse == MAT_INITIAL_MATRIX) {
2882     *matredundant = C;
2883 
2884     /* create a supporting struct and attach it to C for reuse */
2885     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2886     if (subsize == 1) {
2887       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2888       c->redundant = redund;
2889     } else {
2890       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2891       c->redundant = redund;
2892     }
2893 
2894     redund->nzlocal   = nzlocal;
2895     redund->nsends    = nsends;
2896     redund->nrecvs    = nrecvs;
2897     redund->send_rank = send_rank;
2898     redund->recv_rank = recv_rank;
2899     redund->sbuf_nz   = sbuf_nz;
2900     redund->rbuf_nz   = rbuf_nz;
2901     redund->sbuf_j    = sbuf_j;
2902     redund->sbuf_a    = sbuf_a;
2903     redund->rbuf_j    = rbuf_j;
2904     redund->rbuf_a    = rbuf_a;
2905     redund->psubcomm  = NULL;
2906 
2907     redund->Destroy = C->ops->destroy;
2908     C->ops->destroy = MatDestroy_MatRedundant;
2909   }
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 #undef __FUNCT__
2914 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2915 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2916 {
2917   PetscErrorCode ierr;
2918   MPI_Comm       comm;
2919   PetscMPIInt    size,subsize;
2920   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2921   Mat_Redundant  *redund=NULL;
2922   PetscSubcomm   psubcomm=NULL;
2923   MPI_Comm       subcomm_in=subcomm;
2924   Mat            *matseq;
2925   IS             isrow,iscol;
2926 
2927   PetscFunctionBegin;
2928   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2929     if (reuse ==  MAT_INITIAL_MATRIX) {
2930       /* create psubcomm, then get subcomm */
2931       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2932       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2933       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2934 
2935       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2936       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2937       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2938       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2939       subcomm = psubcomm->comm;
2940     } else { /* retrieve psubcomm and subcomm */
2941       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2942       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2943       if (subsize == 1) {
2944         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2945         redund = c->redundant;
2946       } else {
2947         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2948         redund = c->redundant;
2949       }
2950       psubcomm = redund->psubcomm;
2951     }
2952     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2953       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2954       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2955         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2956         if (subsize == 1) {
2957           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2958           c->redundant->psubcomm = psubcomm;
2959         } else {
2960           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2961           c->redundant->psubcomm = psubcomm ;
2962         }
2963       }
2964       PetscFunctionReturn(0);
2965     }
2966   }
2967 
2968   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2969   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2970   if (reuse == MAT_INITIAL_MATRIX) {
2971     /* create a local sequential matrix matseq[0] */
2972     mloc_sub = PETSC_DECIDE;
2973     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2974     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2975     rstart = rend - mloc_sub;
2976     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2977     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2978   } else { /* reuse == MAT_REUSE_MATRIX */
2979     if (subsize == 1) {
2980       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2981       redund = c->redundant;
2982     } else {
2983       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2984       redund = c->redundant;
2985     }
2986 
2987     isrow  = redund->isrow;
2988     iscol  = redund->iscol;
2989     matseq = redund->matseq;
2990   }
2991   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2992   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2993 
2994   if (reuse == MAT_INITIAL_MATRIX) {
2995     /* create a supporting struct and attach it to C for reuse */
2996     ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2997     if (subsize == 1) {
2998       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2999       c->redundant = redund;
3000     } else {
3001       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3002       c->redundant = redund;
3003     }
3004     redund->isrow    = isrow;
3005     redund->iscol    = iscol;
3006     redund->matseq   = matseq;
3007     redund->psubcomm = psubcomm;
3008     redund->Destroy               = (*matredundant)->ops->destroy;
3009     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
3010   }
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3016 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3017 {
3018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3019   PetscErrorCode ierr;
3020   PetscInt       i,*idxb = 0;
3021   PetscScalar    *va,*vb;
3022   Vec            vtmp;
3023 
3024   PetscFunctionBegin;
3025   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3026   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3027   if (idx) {
3028     for (i=0; i<A->rmap->n; i++) {
3029       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3030     }
3031   }
3032 
3033   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3034   if (idx) {
3035     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3036   }
3037   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3038   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3039 
3040   for (i=0; i<A->rmap->n; i++) {
3041     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3042       va[i] = vb[i];
3043       if (idx) idx[i] = a->garray[idxb[i]];
3044     }
3045   }
3046 
3047   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3048   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3049   ierr = PetscFree(idxb);CHKERRQ(ierr);
3050   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3056 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3057 {
3058   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3059   PetscErrorCode ierr;
3060   PetscInt       i,*idxb = 0;
3061   PetscScalar    *va,*vb;
3062   Vec            vtmp;
3063 
3064   PetscFunctionBegin;
3065   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3066   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3067   if (idx) {
3068     for (i=0; i<A->cmap->n; i++) {
3069       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3070     }
3071   }
3072 
3073   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3074   if (idx) {
3075     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3076   }
3077   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3078   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3079 
3080   for (i=0; i<A->rmap->n; i++) {
3081     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3082       va[i] = vb[i];
3083       if (idx) idx[i] = a->garray[idxb[i]];
3084     }
3085   }
3086 
3087   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3088   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3089   ierr = PetscFree(idxb);CHKERRQ(ierr);
3090   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3096 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3097 {
3098   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3099   PetscInt       n      = A->rmap->n;
3100   PetscInt       cstart = A->cmap->rstart;
3101   PetscInt       *cmap  = mat->garray;
3102   PetscInt       *diagIdx, *offdiagIdx;
3103   Vec            diagV, offdiagV;
3104   PetscScalar    *a, *diagA, *offdiagA;
3105   PetscInt       r;
3106   PetscErrorCode ierr;
3107 
3108   PetscFunctionBegin;
3109   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3110   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3111   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3112   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3113   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3114   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3115   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3116   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3117   for (r = 0; r < n; ++r) {
3118     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3119       a[r]   = diagA[r];
3120       idx[r] = cstart + diagIdx[r];
3121     } else {
3122       a[r]   = offdiagA[r];
3123       idx[r] = cmap[offdiagIdx[r]];
3124     }
3125   }
3126   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3127   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3128   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3129   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3130   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3131   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 #undef __FUNCT__
3136 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3137 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3138 {
3139   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3140   PetscInt       n      = A->rmap->n;
3141   PetscInt       cstart = A->cmap->rstart;
3142   PetscInt       *cmap  = mat->garray;
3143   PetscInt       *diagIdx, *offdiagIdx;
3144   Vec            diagV, offdiagV;
3145   PetscScalar    *a, *diagA, *offdiagA;
3146   PetscInt       r;
3147   PetscErrorCode ierr;
3148 
3149   PetscFunctionBegin;
3150   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3151   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3152   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3153   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3154   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3155   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3156   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3157   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3158   for (r = 0; r < n; ++r) {
3159     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3160       a[r]   = diagA[r];
3161       idx[r] = cstart + diagIdx[r];
3162     } else {
3163       a[r]   = offdiagA[r];
3164       idx[r] = cmap[offdiagIdx[r]];
3165     }
3166   }
3167   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3168   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3169   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3170   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3171   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3172   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3178 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3179 {
3180   PetscErrorCode ierr;
3181   Mat            *dummy;
3182 
3183   PetscFunctionBegin;
3184   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3185   *newmat = *dummy;
3186   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3191 
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3194 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3195 {
3196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3197   PetscErrorCode ierr;
3198 
3199   PetscFunctionBegin;
3200   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3206 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3207 {
3208   PetscErrorCode ierr;
3209   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3213   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3214   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 /* -------------------------------------------------------------------*/
3220 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3221                                        MatGetRow_MPIAIJ,
3222                                        MatRestoreRow_MPIAIJ,
3223                                        MatMult_MPIAIJ,
3224                                 /* 4*/ MatMultAdd_MPIAIJ,
3225                                        MatMultTranspose_MPIAIJ,
3226                                        MatMultTransposeAdd_MPIAIJ,
3227 #if defined(PETSC_HAVE_PBGL)
3228                                        MatSolve_MPIAIJ,
3229 #else
3230                                        0,
3231 #endif
3232                                        0,
3233                                        0,
3234                                 /*10*/ 0,
3235                                        0,
3236                                        0,
3237                                        MatSOR_MPIAIJ,
3238                                        MatTranspose_MPIAIJ,
3239                                 /*15*/ MatGetInfo_MPIAIJ,
3240                                        MatEqual_MPIAIJ,
3241                                        MatGetDiagonal_MPIAIJ,
3242                                        MatDiagonalScale_MPIAIJ,
3243                                        MatNorm_MPIAIJ,
3244                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3245                                        MatAssemblyEnd_MPIAIJ,
3246                                        MatSetOption_MPIAIJ,
3247                                        MatZeroEntries_MPIAIJ,
3248                                 /*24*/ MatZeroRows_MPIAIJ,
3249                                        0,
3250 #if defined(PETSC_HAVE_PBGL)
3251                                        0,
3252 #else
3253                                        0,
3254 #endif
3255                                        0,
3256                                        0,
3257                                 /*29*/ MatSetUp_MPIAIJ,
3258 #if defined(PETSC_HAVE_PBGL)
3259                                        0,
3260 #else
3261                                        0,
3262 #endif
3263                                        0,
3264                                        0,
3265                                        0,
3266                                 /*34*/ MatDuplicate_MPIAIJ,
3267                                        0,
3268                                        0,
3269                                        0,
3270                                        0,
3271                                 /*39*/ MatAXPY_MPIAIJ,
3272                                        MatGetSubMatrices_MPIAIJ,
3273                                        MatIncreaseOverlap_MPIAIJ,
3274                                        MatGetValues_MPIAIJ,
3275                                        MatCopy_MPIAIJ,
3276                                 /*44*/ MatGetRowMax_MPIAIJ,
3277                                        MatScale_MPIAIJ,
3278                                        0,
3279                                        0,
3280                                        MatZeroRowsColumns_MPIAIJ,
3281                                 /*49*/ MatSetRandom_MPIAIJ,
3282                                        0,
3283                                        0,
3284                                        0,
3285                                        0,
3286                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3287                                        0,
3288                                        MatSetUnfactored_MPIAIJ,
3289                                        MatPermute_MPIAIJ,
3290                                        0,
3291                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3292                                        MatDestroy_MPIAIJ,
3293                                        MatView_MPIAIJ,
3294                                        0,
3295                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3296                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3297                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3298                                        0,
3299                                        0,
3300                                        0,
3301                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3302                                        MatGetRowMinAbs_MPIAIJ,
3303                                        0,
3304                                        MatSetColoring_MPIAIJ,
3305                                        0,
3306                                        MatSetValuesAdifor_MPIAIJ,
3307                                 /*75*/ MatFDColoringApply_AIJ,
3308                                        0,
3309                                        0,
3310                                        0,
3311                                        MatFindZeroDiagonals_MPIAIJ,
3312                                 /*80*/ 0,
3313                                        0,
3314                                        0,
3315                                 /*83*/ MatLoad_MPIAIJ,
3316                                        0,
3317                                        0,
3318                                        0,
3319                                        0,
3320                                        0,
3321                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3322                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3323                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3324                                        MatPtAP_MPIAIJ_MPIAIJ,
3325                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3326                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3327                                        0,
3328                                        0,
3329                                        0,
3330                                        0,
3331                                 /*99*/ 0,
3332                                        0,
3333                                        0,
3334                                        MatConjugate_MPIAIJ,
3335                                        0,
3336                                 /*104*/MatSetValuesRow_MPIAIJ,
3337                                        MatRealPart_MPIAIJ,
3338                                        MatImaginaryPart_MPIAIJ,
3339                                        0,
3340                                        0,
3341                                 /*109*/0,
3342                                        MatGetRedundantMatrix_MPIAIJ,
3343                                        MatGetRowMin_MPIAIJ,
3344                                        0,
3345                                        0,
3346                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3347                                        0,
3348                                        0,
3349                                        0,
3350                                        0,
3351                                 /*119*/0,
3352                                        0,
3353                                        0,
3354                                        0,
3355                                        MatGetMultiProcBlock_MPIAIJ,
3356                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3357                                        MatGetColumnNorms_MPIAIJ,
3358                                        MatInvertBlockDiagonal_MPIAIJ,
3359                                        0,
3360                                        MatGetSubMatricesParallel_MPIAIJ,
3361                                 /*129*/0,
3362                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3363                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3364                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3365                                        0,
3366                                 /*134*/0,
3367                                        0,
3368                                        0,
3369                                        0,
3370                                        0,
3371                                 /*139*/0,
3372                                        0,
3373                                        0
3374 };
3375 
3376 /* ----------------------------------------------------------------------------------------*/
3377 
3378 #undef __FUNCT__
3379 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3380 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3381 {
3382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3383   PetscErrorCode ierr;
3384 
3385   PetscFunctionBegin;
3386   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3387   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 #undef __FUNCT__
3392 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3393 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3394 {
3395   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3396   PetscErrorCode ierr;
3397 
3398   PetscFunctionBegin;
3399   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3400   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3406 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3407 {
3408   Mat_MPIAIJ     *b;
3409   PetscErrorCode ierr;
3410 
3411   PetscFunctionBegin;
3412   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3413   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3414   b = (Mat_MPIAIJ*)B->data;
3415 
3416   if (!B->preallocated) {
3417     /* Explicitly create 2 MATSEQAIJ matrices. */
3418     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3419     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3420     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3421     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3422     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3423     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3424     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3425     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3426     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3427     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3428   }
3429 
3430   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3431   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3432   B->preallocated = PETSC_TRUE;
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 #undef __FUNCT__
3437 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3438 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3439 {
3440   Mat            mat;
3441   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   *newmat = 0;
3446   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3447   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3448   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3449   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3450   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3451   a       = (Mat_MPIAIJ*)mat->data;
3452 
3453   mat->factortype   = matin->factortype;
3454   mat->rmap->bs     = matin->rmap->bs;
3455   mat->cmap->bs     = matin->cmap->bs;
3456   mat->assembled    = PETSC_TRUE;
3457   mat->insertmode   = NOT_SET_VALUES;
3458   mat->preallocated = PETSC_TRUE;
3459 
3460   a->size         = oldmat->size;
3461   a->rank         = oldmat->rank;
3462   a->donotstash   = oldmat->donotstash;
3463   a->roworiented  = oldmat->roworiented;
3464   a->rowindices   = 0;
3465   a->rowvalues    = 0;
3466   a->getrowactive = PETSC_FALSE;
3467 
3468   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3469   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3470 
3471   if (oldmat->colmap) {
3472 #if defined(PETSC_USE_CTABLE)
3473     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3474 #else
3475     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3476     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3477     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3478 #endif
3479   } else a->colmap = 0;
3480   if (oldmat->garray) {
3481     PetscInt len;
3482     len  = oldmat->B->cmap->n;
3483     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3484     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3485     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3486   } else a->garray = 0;
3487 
3488   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3489   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3490   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3491   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3492   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3493   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3494   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3495   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3496   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3497   *newmat = mat;
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatLoad_MPIAIJ"
3505 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3506 {
3507   PetscScalar    *vals,*svals;
3508   MPI_Comm       comm;
3509   PetscErrorCode ierr;
3510   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3511   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3512   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3513   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3514   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3515   int            fd;
3516   PetscInt       bs = 1;
3517 
3518   PetscFunctionBegin;
3519   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3521   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3522   if (!rank) {
3523     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3524     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3525     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3526   }
3527 
3528   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3529   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3530   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3531 
3532   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3533 
3534   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3535   M    = header[1]; N = header[2];
3536   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3537   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3538   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3539 
3540   /* If global sizes are set, check if they are consistent with that given in the file */
3541   if (sizesset) {
3542     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3543   }
3544   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3545   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3546 
3547   /* determine ownership of all (block) rows */
3548   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3549   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3550   else m = newMat->rmap->n; /* Set by user */
3551 
3552   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3553   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3554 
3555   /* First process needs enough room for process with most rows */
3556   if (!rank) {
3557     mmax = rowners[1];
3558     for (i=2; i<=size; i++) {
3559       mmax = PetscMax(mmax, rowners[i]);
3560     }
3561   } else mmax = -1;             /* unused, but compilers complain */
3562 
3563   rowners[0] = 0;
3564   for (i=2; i<=size; i++) {
3565     rowners[i] += rowners[i-1];
3566   }
3567   rstart = rowners[rank];
3568   rend   = rowners[rank+1];
3569 
3570   /* distribute row lengths to all processors */
3571   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3572   if (!rank) {
3573     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3574     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3575     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3576     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3577     for (j=0; j<m; j++) {
3578       procsnz[0] += ourlens[j];
3579     }
3580     for (i=1; i<size; i++) {
3581       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3582       /* calculate the number of nonzeros on each processor */
3583       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3584         procsnz[i] += rowlengths[j];
3585       }
3586       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3587     }
3588     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3589   } else {
3590     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3591   }
3592 
3593   if (!rank) {
3594     /* determine max buffer needed and allocate it */
3595     maxnz = 0;
3596     for (i=0; i<size; i++) {
3597       maxnz = PetscMax(maxnz,procsnz[i]);
3598     }
3599     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3600 
3601     /* read in my part of the matrix column indices  */
3602     nz   = procsnz[0];
3603     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3604     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3605 
3606     /* read in every one elses and ship off */
3607     for (i=1; i<size; i++) {
3608       nz   = procsnz[i];
3609       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3610       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3611     }
3612     ierr = PetscFree(cols);CHKERRQ(ierr);
3613   } else {
3614     /* determine buffer space needed for message */
3615     nz = 0;
3616     for (i=0; i<m; i++) {
3617       nz += ourlens[i];
3618     }
3619     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3620 
3621     /* receive message of column indices*/
3622     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3623   }
3624 
3625   /* determine column ownership if matrix is not square */
3626   if (N != M) {
3627     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3628     else n = newMat->cmap->n;
3629     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3630     cstart = cend - n;
3631   } else {
3632     cstart = rstart;
3633     cend   = rend;
3634     n      = cend - cstart;
3635   }
3636 
3637   /* loop over local rows, determining number of off diagonal entries */
3638   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3639   jj   = 0;
3640   for (i=0; i<m; i++) {
3641     for (j=0; j<ourlens[i]; j++) {
3642       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3643       jj++;
3644     }
3645   }
3646 
3647   for (i=0; i<m; i++) {
3648     ourlens[i] -= offlens[i];
3649   }
3650   if (!sizesset) {
3651     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3652   }
3653 
3654   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3655 
3656   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3657 
3658   for (i=0; i<m; i++) {
3659     ourlens[i] += offlens[i];
3660   }
3661 
3662   if (!rank) {
3663     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3664 
3665     /* read in my part of the matrix numerical values  */
3666     nz   = procsnz[0];
3667     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3668 
3669     /* insert into matrix */
3670     jj      = rstart;
3671     smycols = mycols;
3672     svals   = vals;
3673     for (i=0; i<m; i++) {
3674       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3675       smycols += ourlens[i];
3676       svals   += ourlens[i];
3677       jj++;
3678     }
3679 
3680     /* read in other processors and ship out */
3681     for (i=1; i<size; i++) {
3682       nz   = procsnz[i];
3683       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3684       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3685     }
3686     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3687   } else {
3688     /* receive numeric values */
3689     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3690 
3691     /* receive message of values*/
3692     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3693 
3694     /* insert into matrix */
3695     jj      = rstart;
3696     smycols = mycols;
3697     svals   = vals;
3698     for (i=0; i<m; i++) {
3699       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3700       smycols += ourlens[i];
3701       svals   += ourlens[i];
3702       jj++;
3703     }
3704   }
3705   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3706   ierr = PetscFree(vals);CHKERRQ(ierr);
3707   ierr = PetscFree(mycols);CHKERRQ(ierr);
3708   ierr = PetscFree(rowners);CHKERRQ(ierr);
3709   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3710   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3716 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3717 {
3718   PetscErrorCode ierr;
3719   IS             iscol_local;
3720   PetscInt       csize;
3721 
3722   PetscFunctionBegin;
3723   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3724   if (call == MAT_REUSE_MATRIX) {
3725     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3726     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3727   } else {
3728     PetscInt cbs;
3729     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3730     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3731     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3732   }
3733   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3734   if (call == MAT_INITIAL_MATRIX) {
3735     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3736     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3737   }
3738   PetscFunctionReturn(0);
3739 }
3740 
3741 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3744 /*
3745     Not great since it makes two copies of the submatrix, first an SeqAIJ
3746   in local and then by concatenating the local matrices the end result.
3747   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3748 
3749   Note: This requires a sequential iscol with all indices.
3750 */
3751 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3752 {
3753   PetscErrorCode ierr;
3754   PetscMPIInt    rank,size;
3755   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3756   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3757   PetscBool      allcolumns, colflag;
3758   Mat            M,Mreuse;
3759   MatScalar      *vwork,*aa;
3760   MPI_Comm       comm;
3761   Mat_SeqAIJ     *aij;
3762 
3763   PetscFunctionBegin;
3764   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3765   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3766   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3767 
3768   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3769   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3770   if (colflag && ncol == mat->cmap->N) {
3771     allcolumns = PETSC_TRUE;
3772   } else {
3773     allcolumns = PETSC_FALSE;
3774   }
3775   if (call ==  MAT_REUSE_MATRIX) {
3776     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3777     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3778     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3779   } else {
3780     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3781   }
3782 
3783   /*
3784       m - number of local rows
3785       n - number of columns (same on all processors)
3786       rstart - first row in new global matrix generated
3787   */
3788   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3789   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3790   if (call == MAT_INITIAL_MATRIX) {
3791     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3792     ii  = aij->i;
3793     jj  = aij->j;
3794 
3795     /*
3796         Determine the number of non-zeros in the diagonal and off-diagonal
3797         portions of the matrix in order to do correct preallocation
3798     */
3799 
3800     /* first get start and end of "diagonal" columns */
3801     if (csize == PETSC_DECIDE) {
3802       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3803       if (mglobal == n) { /* square matrix */
3804         nlocal = m;
3805       } else {
3806         nlocal = n/size + ((n % size) > rank);
3807       }
3808     } else {
3809       nlocal = csize;
3810     }
3811     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3812     rstart = rend - nlocal;
3813     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3814 
3815     /* next, compute all the lengths */
3816     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3817     olens = dlens + m;
3818     for (i=0; i<m; i++) {
3819       jend = ii[i+1] - ii[i];
3820       olen = 0;
3821       dlen = 0;
3822       for (j=0; j<jend; j++) {
3823         if (*jj < rstart || *jj >= rend) olen++;
3824         else dlen++;
3825         jj++;
3826       }
3827       olens[i] = olen;
3828       dlens[i] = dlen;
3829     }
3830     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3831     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3832     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3833     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3834     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3835     ierr = PetscFree(dlens);CHKERRQ(ierr);
3836   } else {
3837     PetscInt ml,nl;
3838 
3839     M    = *newmat;
3840     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3841     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3842     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3843     /*
3844          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3845        rather than the slower MatSetValues().
3846     */
3847     M->was_assembled = PETSC_TRUE;
3848     M->assembled     = PETSC_FALSE;
3849   }
3850   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3851   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3852   ii   = aij->i;
3853   jj   = aij->j;
3854   aa   = aij->a;
3855   for (i=0; i<m; i++) {
3856     row   = rstart + i;
3857     nz    = ii[i+1] - ii[i];
3858     cwork = jj;     jj += nz;
3859     vwork = aa;     aa += nz;
3860     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3861   }
3862 
3863   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   *newmat = M;
3866 
3867   /* save submatrix used in processor for next request */
3868   if (call ==  MAT_INITIAL_MATRIX) {
3869     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3870     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3871   }
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 #undef __FUNCT__
3876 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3877 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3878 {
3879   PetscInt       m,cstart, cend,j,nnz,i,d;
3880   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3881   const PetscInt *JJ;
3882   PetscScalar    *values;
3883   PetscErrorCode ierr;
3884 
3885   PetscFunctionBegin;
3886   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3887 
3888   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3889   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3890   m      = B->rmap->n;
3891   cstart = B->cmap->rstart;
3892   cend   = B->cmap->rend;
3893   rstart = B->rmap->rstart;
3894 
3895   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3896 
3897 #if defined(PETSC_USE_DEBUGGING)
3898   for (i=0; i<m; i++) {
3899     nnz = Ii[i+1]- Ii[i];
3900     JJ  = J + Ii[i];
3901     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3902     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3903     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3904   }
3905 #endif
3906 
3907   for (i=0; i<m; i++) {
3908     nnz     = Ii[i+1]- Ii[i];
3909     JJ      = J + Ii[i];
3910     nnz_max = PetscMax(nnz_max,nnz);
3911     d       = 0;
3912     for (j=0; j<nnz; j++) {
3913       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3914     }
3915     d_nnz[i] = d;
3916     o_nnz[i] = nnz - d;
3917   }
3918   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3919   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3920 
3921   if (v) values = (PetscScalar*)v;
3922   else {
3923     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3924     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3925   }
3926 
3927   for (i=0; i<m; i++) {
3928     ii   = i + rstart;
3929     nnz  = Ii[i+1]- Ii[i];
3930     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3931   }
3932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934 
3935   if (!v) {
3936     ierr = PetscFree(values);CHKERRQ(ierr);
3937   }
3938   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 #undef __FUNCT__
3943 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3944 /*@
3945    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3946    (the default parallel PETSc format).
3947 
3948    Collective on MPI_Comm
3949 
3950    Input Parameters:
3951 +  B - the matrix
3952 .  i - the indices into j for the start of each local row (starts with zero)
3953 .  j - the column indices for each local row (starts with zero)
3954 -  v - optional values in the matrix
3955 
3956    Level: developer
3957 
3958    Notes:
3959        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3960      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3961      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3962 
3963        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3964 
3965        The format which is used for the sparse matrix input, is equivalent to a
3966     row-major ordering.. i.e for the following matrix, the input data expected is
3967     as shown:
3968 
3969         1 0 0
3970         2 0 3     P0
3971        -------
3972         4 5 6     P1
3973 
3974      Process0 [P0]: rows_owned=[0,1]
3975         i =  {0,1,3}  [size = nrow+1  = 2+1]
3976         j =  {0,0,2}  [size = nz = 6]
3977         v =  {1,2,3}  [size = nz = 6]
3978 
3979      Process1 [P1]: rows_owned=[2]
3980         i =  {0,3}    [size = nrow+1  = 1+1]
3981         j =  {0,1,2}  [size = nz = 6]
3982         v =  {4,5,6}  [size = nz = 6]
3983 
3984 .keywords: matrix, aij, compressed row, sparse, parallel
3985 
3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3987           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3988 @*/
3989 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3990 {
3991   PetscErrorCode ierr;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3995   PetscFunctionReturn(0);
3996 }
3997 
3998 #undef __FUNCT__
3999 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4000 /*@C
4001    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4002    (the default parallel PETSc format).  For good matrix assembly performance
4003    the user should preallocate the matrix storage by setting the parameters
4004    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4005    performance can be increased by more than a factor of 50.
4006 
4007    Collective on MPI_Comm
4008 
4009    Input Parameters:
4010 +  A - the matrix
4011 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4012            (same value is used for all local rows)
4013 .  d_nnz - array containing the number of nonzeros in the various rows of the
4014            DIAGONAL portion of the local submatrix (possibly different for each row)
4015            or NULL, if d_nz is used to specify the nonzero structure.
4016            The size of this array is equal to the number of local rows, i.e 'm'.
4017            For matrices that will be factored, you must leave room for (and set)
4018            the diagonal entry even if it is zero.
4019 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4020            submatrix (same value is used for all local rows).
4021 -  o_nnz - array containing the number of nonzeros in the various rows of the
4022            OFF-DIAGONAL portion of the local submatrix (possibly different for
4023            each row) or NULL, if o_nz is used to specify the nonzero
4024            structure. The size of this array is equal to the number
4025            of local rows, i.e 'm'.
4026 
4027    If the *_nnz parameter is given then the *_nz parameter is ignored
4028 
4029    The AIJ format (also called the Yale sparse matrix format or
4030    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4031    storage.  The stored row and column indices begin with zero.
4032    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4033 
4034    The parallel matrix is partitioned such that the first m0 rows belong to
4035    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4036    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4037 
4038    The DIAGONAL portion of the local submatrix of a processor can be defined
4039    as the submatrix which is obtained by extraction the part corresponding to
4040    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4041    first row that belongs to the processor, r2 is the last row belonging to
4042    the this processor, and c1-c2 is range of indices of the local part of a
4043    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4044    common case of a square matrix, the row and column ranges are the same and
4045    the DIAGONAL part is also square. The remaining portion of the local
4046    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4047 
4048    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4049 
4050    You can call MatGetInfo() to get information on how effective the preallocation was;
4051    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4052    You can also run with the option -info and look for messages with the string
4053    malloc in them to see if additional memory allocation was needed.
4054 
4055    Example usage:
4056 
4057    Consider the following 8x8 matrix with 34 non-zero values, that is
4058    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4059    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4060    as follows:
4061 
4062 .vb
4063             1  2  0  |  0  3  0  |  0  4
4064     Proc0   0  5  6  |  7  0  0  |  8  0
4065             9  0 10  | 11  0  0  | 12  0
4066     -------------------------------------
4067            13  0 14  | 15 16 17  |  0  0
4068     Proc1   0 18  0  | 19 20 21  |  0  0
4069             0  0  0  | 22 23  0  | 24  0
4070     -------------------------------------
4071     Proc2  25 26 27  |  0  0 28  | 29  0
4072            30  0  0  | 31 32 33  |  0 34
4073 .ve
4074 
4075    This can be represented as a collection of submatrices as:
4076 
4077 .vb
4078       A B C
4079       D E F
4080       G H I
4081 .ve
4082 
4083    Where the submatrices A,B,C are owned by proc0, D,E,F are
4084    owned by proc1, G,H,I are owned by proc2.
4085 
4086    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4087    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4088    The 'M','N' parameters are 8,8, and have the same values on all procs.
4089 
4090    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4091    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4092    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4093    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4094    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4095    matrix, ans [DF] as another SeqAIJ matrix.
4096 
4097    When d_nz, o_nz parameters are specified, d_nz storage elements are
4098    allocated for every row of the local diagonal submatrix, and o_nz
4099    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4100    One way to choose d_nz and o_nz is to use the max nonzerors per local
4101    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4102    In this case, the values of d_nz,o_nz are:
4103 .vb
4104      proc0 : dnz = 2, o_nz = 2
4105      proc1 : dnz = 3, o_nz = 2
4106      proc2 : dnz = 1, o_nz = 4
4107 .ve
4108    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4109    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4110    for proc3. i.e we are using 12+15+10=37 storage locations to store
4111    34 values.
4112 
4113    When d_nnz, o_nnz parameters are specified, the storage is specified
4114    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4115    In the above case the values for d_nnz,o_nnz are:
4116 .vb
4117      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4118      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4119      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4120 .ve
4121    Here the space allocated is sum of all the above values i.e 34, and
4122    hence pre-allocation is perfect.
4123 
4124    Level: intermediate
4125 
4126 .keywords: matrix, aij, compressed row, sparse, parallel
4127 
4128 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4129           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4130 @*/
4131 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4132 {
4133   PetscErrorCode ierr;
4134 
4135   PetscFunctionBegin;
4136   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4137   PetscValidType(B,1);
4138   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4139   PetscFunctionReturn(0);
4140 }
4141 
4142 #undef __FUNCT__
4143 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4144 /*@
4145      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4146          CSR format the local rows.
4147 
4148    Collective on MPI_Comm
4149 
4150    Input Parameters:
4151 +  comm - MPI communicator
4152 .  m - number of local rows (Cannot be PETSC_DECIDE)
4153 .  n - This value should be the same as the local size used in creating the
4154        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4155        calculated if N is given) For square matrices n is almost always m.
4156 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4157 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4158 .   i - row indices
4159 .   j - column indices
4160 -   a - matrix values
4161 
4162    Output Parameter:
4163 .   mat - the matrix
4164 
4165    Level: intermediate
4166 
4167    Notes:
4168        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4169      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4170      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4171 
4172        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4173 
4174        The format which is used for the sparse matrix input, is equivalent to a
4175     row-major ordering.. i.e for the following matrix, the input data expected is
4176     as shown:
4177 
4178         1 0 0
4179         2 0 3     P0
4180        -------
4181         4 5 6     P1
4182 
4183      Process0 [P0]: rows_owned=[0,1]
4184         i =  {0,1,3}  [size = nrow+1  = 2+1]
4185         j =  {0,0,2}  [size = nz = 6]
4186         v =  {1,2,3}  [size = nz = 6]
4187 
4188      Process1 [P1]: rows_owned=[2]
4189         i =  {0,3}    [size = nrow+1  = 1+1]
4190         j =  {0,1,2}  [size = nz = 6]
4191         v =  {4,5,6}  [size = nz = 6]
4192 
4193 .keywords: matrix, aij, compressed row, sparse, parallel
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4197 @*/
4198 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4199 {
4200   PetscErrorCode ierr;
4201 
4202   PetscFunctionBegin;
4203   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4204   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4205   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4206   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4207   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4208   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4209   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 #undef __FUNCT__
4214 #define __FUNCT__ "MatCreateAIJ"
4215 /*@C
4216    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4217    (the default parallel PETSc format).  For good matrix assembly performance
4218    the user should preallocate the matrix storage by setting the parameters
4219    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4220    performance can be increased by more than a factor of 50.
4221 
4222    Collective on MPI_Comm
4223 
4224    Input Parameters:
4225 +  comm - MPI communicator
4226 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4227            This value should be the same as the local size used in creating the
4228            y vector for the matrix-vector product y = Ax.
4229 .  n - This value should be the same as the local size used in creating the
4230        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4231        calculated if N is given) For square matrices n is almost always m.
4232 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4233 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4234 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4235            (same value is used for all local rows)
4236 .  d_nnz - array containing the number of nonzeros in the various rows of the
4237            DIAGONAL portion of the local submatrix (possibly different for each row)
4238            or NULL, if d_nz is used to specify the nonzero structure.
4239            The size of this array is equal to the number of local rows, i.e 'm'.
4240 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4241            submatrix (same value is used for all local rows).
4242 -  o_nnz - array containing the number of nonzeros in the various rows of the
4243            OFF-DIAGONAL portion of the local submatrix (possibly different for
4244            each row) or NULL, if o_nz is used to specify the nonzero
4245            structure. The size of this array is equal to the number
4246            of local rows, i.e 'm'.
4247 
4248    Output Parameter:
4249 .  A - the matrix
4250 
4251    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4252    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4253    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4254 
4255    Notes:
4256    If the *_nnz parameter is given then the *_nz parameter is ignored
4257 
4258    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4259    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4260    storage requirements for this matrix.
4261 
4262    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4263    processor than it must be used on all processors that share the object for
4264    that argument.
4265 
4266    The user MUST specify either the local or global matrix dimensions
4267    (possibly both).
4268 
4269    The parallel matrix is partitioned across processors such that the
4270    first m0 rows belong to process 0, the next m1 rows belong to
4271    process 1, the next m2 rows belong to process 2 etc.. where
4272    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4273    values corresponding to [m x N] submatrix.
4274 
4275    The columns are logically partitioned with the n0 columns belonging
4276    to 0th partition, the next n1 columns belonging to the next
4277    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4278 
4279    The DIAGONAL portion of the local submatrix on any given processor
4280    is the submatrix corresponding to the rows and columns m,n
4281    corresponding to the given processor. i.e diagonal matrix on
4282    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4283    etc. The remaining portion of the local submatrix [m x (N-n)]
4284    constitute the OFF-DIAGONAL portion. The example below better
4285    illustrates this concept.
4286 
4287    For a square global matrix we define each processor's diagonal portion
4288    to be its local rows and the corresponding columns (a square submatrix);
4289    each processor's off-diagonal portion encompasses the remainder of the
4290    local matrix (a rectangular submatrix).
4291 
4292    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4293 
4294    When calling this routine with a single process communicator, a matrix of
4295    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4296    type of communicator, use the construction mechanism:
4297      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4298 
4299    By default, this format uses inodes (identical nodes) when possible.
4300    We search for consecutive rows with the same nonzero structure, thereby
4301    reusing matrix information to achieve increased efficiency.
4302 
4303    Options Database Keys:
4304 +  -mat_no_inode  - Do not use inodes
4305 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4306 -  -mat_aij_oneindex - Internally use indexing starting at 1
4307         rather than 0.  Note that when calling MatSetValues(),
4308         the user still MUST index entries starting at 0!
4309 
4310 
4311    Example usage:
4312 
4313    Consider the following 8x8 matrix with 34 non-zero values, that is
4314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4316    as follows:
4317 
4318 .vb
4319             1  2  0  |  0  3  0  |  0  4
4320     Proc0   0  5  6  |  7  0  0  |  8  0
4321             9  0 10  | 11  0  0  | 12  0
4322     -------------------------------------
4323            13  0 14  | 15 16 17  |  0  0
4324     Proc1   0 18  0  | 19 20 21  |  0  0
4325             0  0  0  | 22 23  0  | 24  0
4326     -------------------------------------
4327     Proc2  25 26 27  |  0  0 28  | 29  0
4328            30  0  0  | 31 32 33  |  0 34
4329 .ve
4330 
4331    This can be represented as a collection of submatrices as:
4332 
4333 .vb
4334       A B C
4335       D E F
4336       G H I
4337 .ve
4338 
4339    Where the submatrices A,B,C are owned by proc0, D,E,F are
4340    owned by proc1, G,H,I are owned by proc2.
4341 
4342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4344    The 'M','N' parameters are 8,8, and have the same values on all procs.
4345 
4346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4351    matrix, ans [DF] as another SeqAIJ matrix.
4352 
4353    When d_nz, o_nz parameters are specified, d_nz storage elements are
4354    allocated for every row of the local diagonal submatrix, and o_nz
4355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4356    One way to choose d_nz and o_nz is to use the max nonzerors per local
4357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4358    In this case, the values of d_nz,o_nz are:
4359 .vb
4360      proc0 : dnz = 2, o_nz = 2
4361      proc1 : dnz = 3, o_nz = 2
4362      proc2 : dnz = 1, o_nz = 4
4363 .ve
4364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4366    for proc3. i.e we are using 12+15+10=37 storage locations to store
4367    34 values.
4368 
4369    When d_nnz, o_nnz parameters are specified, the storage is specified
4370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4371    In the above case the values for d_nnz,o_nnz are:
4372 .vb
4373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4376 .ve
4377    Here the space allocated is sum of all the above values i.e 34, and
4378    hence pre-allocation is perfect.
4379 
4380    Level: intermediate
4381 
4382 .keywords: matrix, aij, compressed row, sparse, parallel
4383 
4384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4385           MPIAIJ, MatCreateMPIAIJWithArrays()
4386 @*/
4387 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4388 {
4389   PetscErrorCode ierr;
4390   PetscMPIInt    size;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4394   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4396   if (size > 1) {
4397     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4398     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4399   } else {
4400     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4401     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4402   }
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4408 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4409 {
4410   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4411 
4412   PetscFunctionBegin;
4413   *Ad     = a->A;
4414   *Ao     = a->B;
4415   *colmap = a->garray;
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 #undef __FUNCT__
4420 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4421 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4422 {
4423   PetscErrorCode ierr;
4424   PetscInt       i;
4425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4426 
4427   PetscFunctionBegin;
4428   if (coloring->ctype == IS_COLORING_GLOBAL) {
4429     ISColoringValue *allcolors,*colors;
4430     ISColoring      ocoloring;
4431 
4432     /* set coloring for diagonal portion */
4433     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4434 
4435     /* set coloring for off-diagonal portion */
4436     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4437     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4438     for (i=0; i<a->B->cmap->n; i++) {
4439       colors[i] = allcolors[a->garray[i]];
4440     }
4441     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4442     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4443     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4444     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4445   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4446     ISColoringValue *colors;
4447     PetscInt        *larray;
4448     ISColoring      ocoloring;
4449 
4450     /* set coloring for diagonal portion */
4451     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4452     for (i=0; i<a->A->cmap->n; i++) {
4453       larray[i] = i + A->cmap->rstart;
4454     }
4455     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4456     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4457     for (i=0; i<a->A->cmap->n; i++) {
4458       colors[i] = coloring->colors[larray[i]];
4459     }
4460     ierr = PetscFree(larray);CHKERRQ(ierr);
4461     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4462     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4463     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4464 
4465     /* set coloring for off-diagonal portion */
4466     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4467     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4468     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4469     for (i=0; i<a->B->cmap->n; i++) {
4470       colors[i] = coloring->colors[larray[i]];
4471     }
4472     ierr = PetscFree(larray);CHKERRQ(ierr);
4473     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4474     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4475     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4476   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4477   PetscFunctionReturn(0);
4478 }
4479 
4480 #undef __FUNCT__
4481 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4482 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4483 {
4484   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4485   PetscErrorCode ierr;
4486 
4487   PetscFunctionBegin;
4488   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4489   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4495 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4496 {
4497   PetscErrorCode ierr;
4498   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4499   PetscInt       *indx;
4500 
4501   PetscFunctionBegin;
4502   /* This routine will ONLY return MPIAIJ type matrix */
4503   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4504   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4505   if (n == PETSC_DECIDE) {
4506     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4507   }
4508   /* Check sum(n) = N */
4509   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4510   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4511 
4512   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4513   rstart -= m;
4514 
4515   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4516   for (i=0; i<m; i++) {
4517     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4518     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4519     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4520   }
4521 
4522   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4523   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4524   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4525   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4526   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4527   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4533 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4534 {
4535   PetscErrorCode ierr;
4536   PetscInt       m,N,i,rstart,nnz,Ii;
4537   PetscInt       *indx;
4538   PetscScalar    *values;
4539 
4540   PetscFunctionBegin;
4541   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4542   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4543   for (i=0; i<m; i++) {
4544     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4545     Ii   = i + rstart;
4546     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4547     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4548   }
4549   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4550   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   PetscFunctionReturn(0);
4552 }
4553 
4554 #undef __FUNCT__
4555 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4556 /*@
4557       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4558                  matrices from each processor
4559 
4560     Collective on MPI_Comm
4561 
4562    Input Parameters:
4563 +    comm - the communicators the parallel matrix will live on
4564 .    inmat - the input sequential matrices
4565 .    n - number of local columns (or PETSC_DECIDE)
4566 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4567 
4568    Output Parameter:
4569 .    outmat - the parallel matrix generated
4570 
4571     Level: advanced
4572 
4573    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4574 
4575 @*/
4576 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4577 {
4578   PetscErrorCode ierr;
4579   PetscMPIInt    size;
4580 
4581   PetscFunctionBegin;
4582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4583   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4584   if (size == 1) {
4585     if (scall == MAT_INITIAL_MATRIX) {
4586       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4587     } else {
4588       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4589     }
4590   } else {
4591     if (scall == MAT_INITIAL_MATRIX) {
4592       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4593     }
4594     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4595   }
4596   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4597   PetscFunctionReturn(0);
4598 }
4599 
4600 #undef __FUNCT__
4601 #define __FUNCT__ "MatFileSplit"
4602 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4603 {
4604   PetscErrorCode    ierr;
4605   PetscMPIInt       rank;
4606   PetscInt          m,N,i,rstart,nnz;
4607   size_t            len;
4608   const PetscInt    *indx;
4609   PetscViewer       out;
4610   char              *name;
4611   Mat               B;
4612   const PetscScalar *values;
4613 
4614   PetscFunctionBegin;
4615   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4616   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4617   /* Should this be the type of the diagonal block of A? */
4618   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4619   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4620   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4621   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4622   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4623   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4624   for (i=0; i<m; i++) {
4625     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4626     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4627     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4628   }
4629   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4630   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631 
4632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4633   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4634   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4635   sprintf(name,"%s.%d",outfile,rank);
4636   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4637   ierr = PetscFree(name);CHKERRQ(ierr);
4638   ierr = MatView(B,out);CHKERRQ(ierr);
4639   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4640   ierr = MatDestroy(&B);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4645 #undef __FUNCT__
4646 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4647 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4648 {
4649   PetscErrorCode      ierr;
4650   Mat_Merge_SeqsToMPI *merge;
4651   PetscContainer      container;
4652 
4653   PetscFunctionBegin;
4654   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4655   if (container) {
4656     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4657     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4669     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4670     ierr = PetscFree(merge);CHKERRQ(ierr);
4671     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4672   }
4673   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4674   PetscFunctionReturn(0);
4675 }
4676 
4677 #include <../src/mat/utils/freespace.h>
4678 #include <petscbt.h>
4679 
4680 #undef __FUNCT__
4681 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4682 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4683 {
4684   PetscErrorCode      ierr;
4685   MPI_Comm            comm;
4686   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4687   PetscMPIInt         size,rank,taga,*len_s;
4688   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4689   PetscInt            proc,m;
4690   PetscInt            **buf_ri,**buf_rj;
4691   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4692   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4693   MPI_Request         *s_waits,*r_waits;
4694   MPI_Status          *status;
4695   MatScalar           *aa=a->a;
4696   MatScalar           **abuf_r,*ba_i;
4697   Mat_Merge_SeqsToMPI *merge;
4698   PetscContainer      container;
4699 
4700   PetscFunctionBegin;
4701   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4702   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4703 
4704   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4705   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4706 
4707   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4708   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4709 
4710   bi     = merge->bi;
4711   bj     = merge->bj;
4712   buf_ri = merge->buf_ri;
4713   buf_rj = merge->buf_rj;
4714 
4715   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4716   owners = merge->rowmap->range;
4717   len_s  = merge->len_s;
4718 
4719   /* send and recv matrix values */
4720   /*-----------------------------*/
4721   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4722   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4723 
4724   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4725   for (proc=0,k=0; proc<size; proc++) {
4726     if (!len_s[proc]) continue;
4727     i    = owners[proc];
4728     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4729     k++;
4730   }
4731 
4732   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4733   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4734   ierr = PetscFree(status);CHKERRQ(ierr);
4735 
4736   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4737   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4738 
4739   /* insert mat values of mpimat */
4740   /*----------------------------*/
4741   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4742   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4743 
4744   for (k=0; k<merge->nrecv; k++) {
4745     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4746     nrows       = *(buf_ri_k[k]);
4747     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4748     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4749   }
4750 
4751   /* set values of ba */
4752   m = merge->rowmap->n;
4753   for (i=0; i<m; i++) {
4754     arow = owners[rank] + i;
4755     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4756     bnzi = bi[i+1] - bi[i];
4757     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4758 
4759     /* add local non-zero vals of this proc's seqmat into ba */
4760     anzi   = ai[arow+1] - ai[arow];
4761     aj     = a->j + ai[arow];
4762     aa     = a->a + ai[arow];
4763     nextaj = 0;
4764     for (j=0; nextaj<anzi; j++) {
4765       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4766         ba_i[j] += aa[nextaj++];
4767       }
4768     }
4769 
4770     /* add received vals into ba */
4771     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4772       /* i-th row */
4773       if (i == *nextrow[k]) {
4774         anzi   = *(nextai[k]+1) - *nextai[k];
4775         aj     = buf_rj[k] + *(nextai[k]);
4776         aa     = abuf_r[k] + *(nextai[k]);
4777         nextaj = 0;
4778         for (j=0; nextaj<anzi; j++) {
4779           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4780             ba_i[j] += aa[nextaj++];
4781           }
4782         }
4783         nextrow[k]++; nextai[k]++;
4784       }
4785     }
4786     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4787   }
4788   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4789   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4790 
4791   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4792   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4793   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4794   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4795   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4800 
4801 #undef __FUNCT__
4802 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4803 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4804 {
4805   PetscErrorCode      ierr;
4806   Mat                 B_mpi;
4807   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4808   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4809   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4810   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4811   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4812   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4813   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4814   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4815   MPI_Status          *status;
4816   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4817   PetscBT             lnkbt;
4818   Mat_Merge_SeqsToMPI *merge;
4819   PetscContainer      container;
4820 
4821   PetscFunctionBegin;
4822   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4823 
4824   /* make sure it is a PETSc comm */
4825   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4826   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4827   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4828 
4829   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4830   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4831 
4832   /* determine row ownership */
4833   /*---------------------------------------------------------*/
4834   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4835   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4836   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4837   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4838   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4839   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4840   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4841 
4842   m      = merge->rowmap->n;
4843   owners = merge->rowmap->range;
4844 
4845   /* determine the number of messages to send, their lengths */
4846   /*---------------------------------------------------------*/
4847   len_s = merge->len_s;
4848 
4849   len          = 0; /* length of buf_si[] */
4850   merge->nsend = 0;
4851   for (proc=0; proc<size; proc++) {
4852     len_si[proc] = 0;
4853     if (proc == rank) {
4854       len_s[proc] = 0;
4855     } else {
4856       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4857       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4858     }
4859     if (len_s[proc]) {
4860       merge->nsend++;
4861       nrows = 0;
4862       for (i=owners[proc]; i<owners[proc+1]; i++) {
4863         if (ai[i+1] > ai[i]) nrows++;
4864       }
4865       len_si[proc] = 2*(nrows+1);
4866       len         += len_si[proc];
4867     }
4868   }
4869 
4870   /* determine the number and length of messages to receive for ij-structure */
4871   /*-------------------------------------------------------------------------*/
4872   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4873   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4874 
4875   /* post the Irecv of j-structure */
4876   /*-------------------------------*/
4877   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4878   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4879 
4880   /* post the Isend of j-structure */
4881   /*--------------------------------*/
4882   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4883 
4884   for (proc=0, k=0; proc<size; proc++) {
4885     if (!len_s[proc]) continue;
4886     i    = owners[proc];
4887     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4888     k++;
4889   }
4890 
4891   /* receives and sends of j-structure are complete */
4892   /*------------------------------------------------*/
4893   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4894   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4895 
4896   /* send and recv i-structure */
4897   /*---------------------------*/
4898   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4899   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4900 
4901   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4902   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4903   for (proc=0,k=0; proc<size; proc++) {
4904     if (!len_s[proc]) continue;
4905     /* form outgoing message for i-structure:
4906          buf_si[0]:                 nrows to be sent
4907                [1:nrows]:           row index (global)
4908                [nrows+1:2*nrows+1]: i-structure index
4909     */
4910     /*-------------------------------------------*/
4911     nrows       = len_si[proc]/2 - 1;
4912     buf_si_i    = buf_si + nrows+1;
4913     buf_si[0]   = nrows;
4914     buf_si_i[0] = 0;
4915     nrows       = 0;
4916     for (i=owners[proc]; i<owners[proc+1]; i++) {
4917       anzi = ai[i+1] - ai[i];
4918       if (anzi) {
4919         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4920         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4921         nrows++;
4922       }
4923     }
4924     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4925     k++;
4926     buf_si += len_si[proc];
4927   }
4928 
4929   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4930   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4931 
4932   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4933   for (i=0; i<merge->nrecv; i++) {
4934     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4935   }
4936 
4937   ierr = PetscFree(len_si);CHKERRQ(ierr);
4938   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4939   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4940   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4941   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4942   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4943   ierr = PetscFree(status);CHKERRQ(ierr);
4944 
4945   /* compute a local seq matrix in each processor */
4946   /*----------------------------------------------*/
4947   /* allocate bi array and free space for accumulating nonzero column info */
4948   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4949   bi[0] = 0;
4950 
4951   /* create and initialize a linked list */
4952   nlnk = N+1;
4953   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4954 
4955   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4956   len  = ai[owners[rank+1]] - ai[owners[rank]];
4957   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4958 
4959   current_space = free_space;
4960 
4961   /* determine symbolic info for each local row */
4962   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4963 
4964   for (k=0; k<merge->nrecv; k++) {
4965     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4966     nrows       = *buf_ri_k[k];
4967     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4968     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4969   }
4970 
4971   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4972   len  = 0;
4973   for (i=0; i<m; i++) {
4974     bnzi = 0;
4975     /* add local non-zero cols of this proc's seqmat into lnk */
4976     arow  = owners[rank] + i;
4977     anzi  = ai[arow+1] - ai[arow];
4978     aj    = a->j + ai[arow];
4979     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4980     bnzi += nlnk;
4981     /* add received col data into lnk */
4982     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4983       if (i == *nextrow[k]) { /* i-th row */
4984         anzi  = *(nextai[k]+1) - *nextai[k];
4985         aj    = buf_rj[k] + *nextai[k];
4986         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4987         bnzi += nlnk;
4988         nextrow[k]++; nextai[k]++;
4989       }
4990     }
4991     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4992 
4993     /* if free space is not available, make more free space */
4994     if (current_space->local_remaining<bnzi) {
4995       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4996       nspacedouble++;
4997     }
4998     /* copy data into free space, then initialize lnk */
4999     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5000     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5001 
5002     current_space->array           += bnzi;
5003     current_space->local_used      += bnzi;
5004     current_space->local_remaining -= bnzi;
5005 
5006     bi[i+1] = bi[i] + bnzi;
5007   }
5008 
5009   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5010 
5011   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5012   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5013   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5014 
5015   /* create symbolic parallel matrix B_mpi */
5016   /*---------------------------------------*/
5017   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5018   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5019   if (n==PETSC_DECIDE) {
5020     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5021   } else {
5022     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5023   }
5024   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5025   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5026   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5027   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5028   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5029 
5030   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5031   B_mpi->assembled    = PETSC_FALSE;
5032   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5033   merge->bi           = bi;
5034   merge->bj           = bj;
5035   merge->buf_ri       = buf_ri;
5036   merge->buf_rj       = buf_rj;
5037   merge->coi          = NULL;
5038   merge->coj          = NULL;
5039   merge->owners_co    = NULL;
5040 
5041   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5042 
5043   /* attach the supporting struct to B_mpi for reuse */
5044   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5045   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5046   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5047   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5048   *mpimat = B_mpi;
5049 
5050   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5051   PetscFunctionReturn(0);
5052 }
5053 
5054 #undef __FUNCT__
5055 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5056 /*@C
5057       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5058                  matrices from each processor
5059 
5060     Collective on MPI_Comm
5061 
5062    Input Parameters:
5063 +    comm - the communicators the parallel matrix will live on
5064 .    seqmat - the input sequential matrices
5065 .    m - number of local rows (or PETSC_DECIDE)
5066 .    n - number of local columns (or PETSC_DECIDE)
5067 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5068 
5069    Output Parameter:
5070 .    mpimat - the parallel matrix generated
5071 
5072     Level: advanced
5073 
5074    Notes:
5075      The dimensions of the sequential matrix in each processor MUST be the same.
5076      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5077      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5078 @*/
5079 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5080 {
5081   PetscErrorCode ierr;
5082   PetscMPIInt    size;
5083 
5084   PetscFunctionBegin;
5085   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5086   if (size == 1) {
5087     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5088     if (scall == MAT_INITIAL_MATRIX) {
5089       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5090     } else {
5091       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5092     }
5093     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5094     PetscFunctionReturn(0);
5095   }
5096   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5097   if (scall == MAT_INITIAL_MATRIX) {
5098     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5099   }
5100   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5101   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 #undef __FUNCT__
5106 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5107 /*@
5108      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5109           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5110           with MatGetSize()
5111 
5112     Not Collective
5113 
5114    Input Parameters:
5115 +    A - the matrix
5116 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5117 
5118    Output Parameter:
5119 .    A_loc - the local sequential matrix generated
5120 
5121     Level: developer
5122 
5123 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5124 
5125 @*/
5126 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5127 {
5128   PetscErrorCode ierr;
5129   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5130   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5131   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5132   MatScalar      *aa=a->a,*ba=b->a,*cam;
5133   PetscScalar    *ca;
5134   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5135   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5136   PetscBool      match;
5137 
5138   PetscFunctionBegin;
5139   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5140   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5141   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5142   if (scall == MAT_INITIAL_MATRIX) {
5143     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5144     ci[0] = 0;
5145     for (i=0; i<am; i++) {
5146       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5147     }
5148     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5149     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5150     k    = 0;
5151     for (i=0; i<am; i++) {
5152       ncols_o = bi[i+1] - bi[i];
5153       ncols_d = ai[i+1] - ai[i];
5154       /* off-diagonal portion of A */
5155       for (jo=0; jo<ncols_o; jo++) {
5156         col = cmap[*bj];
5157         if (col >= cstart) break;
5158         cj[k]   = col; bj++;
5159         ca[k++] = *ba++;
5160       }
5161       /* diagonal portion of A */
5162       for (j=0; j<ncols_d; j++) {
5163         cj[k]   = cstart + *aj++;
5164         ca[k++] = *aa++;
5165       }
5166       /* off-diagonal portion of A */
5167       for (j=jo; j<ncols_o; j++) {
5168         cj[k]   = cmap[*bj++];
5169         ca[k++] = *ba++;
5170       }
5171     }
5172     /* put together the new matrix */
5173     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5174     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5175     /* Since these are PETSc arrays, change flags to free them as necessary. */
5176     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5177     mat->free_a  = PETSC_TRUE;
5178     mat->free_ij = PETSC_TRUE;
5179     mat->nonew   = 0;
5180   } else if (scall == MAT_REUSE_MATRIX) {
5181     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5182     ci = mat->i; cj = mat->j; cam = mat->a;
5183     for (i=0; i<am; i++) {
5184       /* off-diagonal portion of A */
5185       ncols_o = bi[i+1] - bi[i];
5186       for (jo=0; jo<ncols_o; jo++) {
5187         col = cmap[*bj];
5188         if (col >= cstart) break;
5189         *cam++ = *ba++; bj++;
5190       }
5191       /* diagonal portion of A */
5192       ncols_d = ai[i+1] - ai[i];
5193       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5194       /* off-diagonal portion of A */
5195       for (j=jo; j<ncols_o; j++) {
5196         *cam++ = *ba++; bj++;
5197       }
5198     }
5199   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5200   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5201   PetscFunctionReturn(0);
5202 }
5203 
5204 #undef __FUNCT__
5205 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5206 /*@C
5207      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5208 
5209     Not Collective
5210 
5211    Input Parameters:
5212 +    A - the matrix
5213 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5214 -    row, col - index sets of rows and columns to extract (or NULL)
5215 
5216    Output Parameter:
5217 .    A_loc - the local sequential matrix generated
5218 
5219     Level: developer
5220 
5221 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5222 
5223 @*/
5224 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5225 {
5226   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5227   PetscErrorCode ierr;
5228   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5229   IS             isrowa,iscola;
5230   Mat            *aloc;
5231   PetscBool      match;
5232 
5233   PetscFunctionBegin;
5234   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5235   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5236   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5237   if (!row) {
5238     start = A->rmap->rstart; end = A->rmap->rend;
5239     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5240   } else {
5241     isrowa = *row;
5242   }
5243   if (!col) {
5244     start = A->cmap->rstart;
5245     cmap  = a->garray;
5246     nzA   = a->A->cmap->n;
5247     nzB   = a->B->cmap->n;
5248     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5249     ncols = 0;
5250     for (i=0; i<nzB; i++) {
5251       if (cmap[i] < start) idx[ncols++] = cmap[i];
5252       else break;
5253     }
5254     imark = i;
5255     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5256     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5257     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5258   } else {
5259     iscola = *col;
5260   }
5261   if (scall != MAT_INITIAL_MATRIX) {
5262     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5263     aloc[0] = *A_loc;
5264   }
5265   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5266   *A_loc = aloc[0];
5267   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5268   if (!row) {
5269     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5270   }
5271   if (!col) {
5272     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5273   }
5274   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5275   PetscFunctionReturn(0);
5276 }
5277 
5278 #undef __FUNCT__
5279 #define __FUNCT__ "MatGetBrowsOfAcols"
5280 /*@C
5281     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5282 
5283     Collective on Mat
5284 
5285    Input Parameters:
5286 +    A,B - the matrices in mpiaij format
5287 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5288 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5289 
5290    Output Parameter:
5291 +    rowb, colb - index sets of rows and columns of B to extract
5292 -    B_seq - the sequential matrix generated
5293 
5294     Level: developer
5295 
5296 @*/
5297 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5298 {
5299   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5300   PetscErrorCode ierr;
5301   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5302   IS             isrowb,iscolb;
5303   Mat            *bseq=NULL;
5304 
5305   PetscFunctionBegin;
5306   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5307     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5308   }
5309   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5310 
5311   if (scall == MAT_INITIAL_MATRIX) {
5312     start = A->cmap->rstart;
5313     cmap  = a->garray;
5314     nzA   = a->A->cmap->n;
5315     nzB   = a->B->cmap->n;
5316     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5317     ncols = 0;
5318     for (i=0; i<nzB; i++) {  /* row < local row index */
5319       if (cmap[i] < start) idx[ncols++] = cmap[i];
5320       else break;
5321     }
5322     imark = i;
5323     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5324     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5325     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5326     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5327   } else {
5328     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5329     isrowb  = *rowb; iscolb = *colb;
5330     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5331     bseq[0] = *B_seq;
5332   }
5333   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5334   *B_seq = bseq[0];
5335   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5336   if (!rowb) {
5337     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5338   } else {
5339     *rowb = isrowb;
5340   }
5341   if (!colb) {
5342     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5343   } else {
5344     *colb = iscolb;
5345   }
5346   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5347   PetscFunctionReturn(0);
5348 }
5349 
5350 #undef __FUNCT__
5351 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5352 /*
5353     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5354     of the OFF-DIAGONAL portion of local A
5355 
5356     Collective on Mat
5357 
5358    Input Parameters:
5359 +    A,B - the matrices in mpiaij format
5360 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5361 
5362    Output Parameter:
5363 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5364 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5365 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5366 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5367 
5368     Level: developer
5369 
5370 */
5371 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5372 {
5373   VecScatter_MPI_General *gen_to,*gen_from;
5374   PetscErrorCode         ierr;
5375   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5376   Mat_SeqAIJ             *b_oth;
5377   VecScatter             ctx =a->Mvctx;
5378   MPI_Comm               comm;
5379   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5380   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5381   PetscScalar            *rvalues,*svalues;
5382   MatScalar              *b_otha,*bufa,*bufA;
5383   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5384   MPI_Request            *rwaits = NULL,*swaits = NULL;
5385   MPI_Status             *sstatus,rstatus;
5386   PetscMPIInt            jj;
5387   PetscInt               *cols,sbs,rbs;
5388   PetscScalar            *vals;
5389 
5390   PetscFunctionBegin;
5391   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5392   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5393     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5394   }
5395   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5396   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5397 
5398   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5399   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5400   rvalues  = gen_from->values; /* holds the length of receiving row */
5401   svalues  = gen_to->values;   /* holds the length of sending row */
5402   nrecvs   = gen_from->n;
5403   nsends   = gen_to->n;
5404 
5405   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5406   srow    = gen_to->indices;    /* local row index to be sent */
5407   sstarts = gen_to->starts;
5408   sprocs  = gen_to->procs;
5409   sstatus = gen_to->sstatus;
5410   sbs     = gen_to->bs;
5411   rstarts = gen_from->starts;
5412   rprocs  = gen_from->procs;
5413   rbs     = gen_from->bs;
5414 
5415   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5416   if (scall == MAT_INITIAL_MATRIX) {
5417     /* i-array */
5418     /*---------*/
5419     /*  post receives */
5420     for (i=0; i<nrecvs; i++) {
5421       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5422       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5423       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5424     }
5425 
5426     /* pack the outgoing message */
5427     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5428 
5429     sstartsj[0] = 0;
5430     rstartsj[0] = 0;
5431     len         = 0; /* total length of j or a array to be sent */
5432     k           = 0;
5433     for (i=0; i<nsends; i++) {
5434       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5435       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5436       for (j=0; j<nrows; j++) {
5437         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5438         for (l=0; l<sbs; l++) {
5439           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5440 
5441           rowlen[j*sbs+l] = ncols;
5442 
5443           len += ncols;
5444           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5445         }
5446         k++;
5447       }
5448       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5449 
5450       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5451     }
5452     /* recvs and sends of i-array are completed */
5453     i = nrecvs;
5454     while (i--) {
5455       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5456     }
5457     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5458 
5459     /* allocate buffers for sending j and a arrays */
5460     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5461     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5462 
5463     /* create i-array of B_oth */
5464     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5465 
5466     b_othi[0] = 0;
5467     len       = 0; /* total length of j or a array to be received */
5468     k         = 0;
5469     for (i=0; i<nrecvs; i++) {
5470       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5471       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5472       for (j=0; j<nrows; j++) {
5473         b_othi[k+1] = b_othi[k] + rowlen[j];
5474         len        += rowlen[j]; k++;
5475       }
5476       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5477     }
5478 
5479     /* allocate space for j and a arrrays of B_oth */
5480     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5481     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5482 
5483     /* j-array */
5484     /*---------*/
5485     /*  post receives of j-array */
5486     for (i=0; i<nrecvs; i++) {
5487       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5488       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5489     }
5490 
5491     /* pack the outgoing message j-array */
5492     k = 0;
5493     for (i=0; i<nsends; i++) {
5494       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5495       bufJ  = bufj+sstartsj[i];
5496       for (j=0; j<nrows; j++) {
5497         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5498         for (ll=0; ll<sbs; ll++) {
5499           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5500           for (l=0; l<ncols; l++) {
5501             *bufJ++ = cols[l];
5502           }
5503           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5504         }
5505       }
5506       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5507     }
5508 
5509     /* recvs and sends of j-array are completed */
5510     i = nrecvs;
5511     while (i--) {
5512       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5513     }
5514     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5515   } else if (scall == MAT_REUSE_MATRIX) {
5516     sstartsj = *startsj_s;
5517     rstartsj = *startsj_r;
5518     bufa     = *bufa_ptr;
5519     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5520     b_otha   = b_oth->a;
5521   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5522 
5523   /* a-array */
5524   /*---------*/
5525   /*  post receives of a-array */
5526   for (i=0; i<nrecvs; i++) {
5527     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5528     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5529   }
5530 
5531   /* pack the outgoing message a-array */
5532   k = 0;
5533   for (i=0; i<nsends; i++) {
5534     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5535     bufA  = bufa+sstartsj[i];
5536     for (j=0; j<nrows; j++) {
5537       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5538       for (ll=0; ll<sbs; ll++) {
5539         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5540         for (l=0; l<ncols; l++) {
5541           *bufA++ = vals[l];
5542         }
5543         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5544       }
5545     }
5546     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5547   }
5548   /* recvs and sends of a-array are completed */
5549   i = nrecvs;
5550   while (i--) {
5551     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5552   }
5553   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5554   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5555 
5556   if (scall == MAT_INITIAL_MATRIX) {
5557     /* put together the new matrix */
5558     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5559 
5560     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5561     /* Since these are PETSc arrays, change flags to free them as necessary. */
5562     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5563     b_oth->free_a  = PETSC_TRUE;
5564     b_oth->free_ij = PETSC_TRUE;
5565     b_oth->nonew   = 0;
5566 
5567     ierr = PetscFree(bufj);CHKERRQ(ierr);
5568     if (!startsj_s || !bufa_ptr) {
5569       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5570       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5571     } else {
5572       *startsj_s = sstartsj;
5573       *startsj_r = rstartsj;
5574       *bufa_ptr  = bufa;
5575     }
5576   }
5577   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 #undef __FUNCT__
5582 #define __FUNCT__ "MatGetCommunicationStructs"
5583 /*@C
5584   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5585 
5586   Not Collective
5587 
5588   Input Parameters:
5589 . A - The matrix in mpiaij format
5590 
5591   Output Parameter:
5592 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5593 . colmap - A map from global column index to local index into lvec
5594 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5595 
5596   Level: developer
5597 
5598 @*/
5599 #if defined(PETSC_USE_CTABLE)
5600 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5601 #else
5602 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5603 #endif
5604 {
5605   Mat_MPIAIJ *a;
5606 
5607   PetscFunctionBegin;
5608   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5609   PetscValidPointer(lvec, 2);
5610   PetscValidPointer(colmap, 3);
5611   PetscValidPointer(multScatter, 4);
5612   a = (Mat_MPIAIJ*) A->data;
5613   if (lvec) *lvec = a->lvec;
5614   if (colmap) *colmap = a->colmap;
5615   if (multScatter) *multScatter = a->Mvctx;
5616   PetscFunctionReturn(0);
5617 }
5618 
5619 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5620 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5621 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5622 
5623 #undef __FUNCT__
5624 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5625 /*
5626     Computes (B'*A')' since computing B*A directly is untenable
5627 
5628                n                       p                          p
5629         (              )       (              )         (                  )
5630       m (      A       )  *  n (       B      )   =   m (         C        )
5631         (              )       (              )         (                  )
5632 
5633 */
5634 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5635 {
5636   PetscErrorCode ierr;
5637   Mat            At,Bt,Ct;
5638 
5639   PetscFunctionBegin;
5640   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5641   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5642   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5643   ierr = MatDestroy(&At);CHKERRQ(ierr);
5644   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5645   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5646   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5647   PetscFunctionReturn(0);
5648 }
5649 
5650 #undef __FUNCT__
5651 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5652 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5653 {
5654   PetscErrorCode ierr;
5655   PetscInt       m=A->rmap->n,n=B->cmap->n;
5656   Mat            Cmat;
5657 
5658   PetscFunctionBegin;
5659   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5660   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5661   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5662   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5663   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5664   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5665   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5666   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5667 
5668   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5669 
5670   *C = Cmat;
5671   PetscFunctionReturn(0);
5672 }
5673 
5674 /* ----------------------------------------------------------------*/
5675 #undef __FUNCT__
5676 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5677 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5678 {
5679   PetscErrorCode ierr;
5680 
5681   PetscFunctionBegin;
5682   if (scall == MAT_INITIAL_MATRIX) {
5683     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5684     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5685     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5686   }
5687   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5688   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5689   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5690   PetscFunctionReturn(0);
5691 }
5692 
5693 #if defined(PETSC_HAVE_MUMPS)
5694 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5695 #endif
5696 #if defined(PETSC_HAVE_PASTIX)
5697 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5698 #endif
5699 #if defined(PETSC_HAVE_SUPERLU_DIST)
5700 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5701 #endif
5702 #if defined(PETSC_HAVE_CLIQUE)
5703 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5704 #endif
5705 
5706 /*MC
5707    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5708 
5709    Options Database Keys:
5710 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5711 
5712   Level: beginner
5713 
5714 .seealso: MatCreateAIJ()
5715 M*/
5716 
5717 #undef __FUNCT__
5718 #define __FUNCT__ "MatCreate_MPIAIJ"
5719 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5720 {
5721   Mat_MPIAIJ     *b;
5722   PetscErrorCode ierr;
5723   PetscMPIInt    size;
5724 
5725   PetscFunctionBegin;
5726   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5727 
5728   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5729   B->data       = (void*)b;
5730   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5731   B->assembled  = PETSC_FALSE;
5732   B->insertmode = NOT_SET_VALUES;
5733   b->size       = size;
5734 
5735   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5736 
5737   /* build cache for off array entries formed */
5738   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5739 
5740   b->donotstash  = PETSC_FALSE;
5741   b->colmap      = 0;
5742   b->garray      = 0;
5743   b->roworiented = PETSC_TRUE;
5744 
5745   /* stuff used for matrix vector multiply */
5746   b->lvec  = NULL;
5747   b->Mvctx = NULL;
5748 
5749   /* stuff for MatGetRow() */
5750   b->rowindices   = 0;
5751   b->rowvalues    = 0;
5752   b->getrowactive = PETSC_FALSE;
5753 
5754   /* flexible pointer used in CUSP/CUSPARSE classes */
5755   b->spptr = NULL;
5756 
5757 #if defined(PETSC_HAVE_MUMPS)
5758   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5759 #endif
5760 #if defined(PETSC_HAVE_PASTIX)
5761   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5762 #endif
5763 #if defined(PETSC_HAVE_SUPERLU_DIST)
5764   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5765 #endif
5766 #if defined(PETSC_HAVE_CLIQUE)
5767   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5768 #endif
5769   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5770   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5771   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5772   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5773   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5774   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5775   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5776   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5777   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5778   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5779   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5780   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5781   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5782   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5783   PetscFunctionReturn(0);
5784 }
5785 
5786 #undef __FUNCT__
5787 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5788 /*@
5789      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5790          and "off-diagonal" part of the matrix in CSR format.
5791 
5792    Collective on MPI_Comm
5793 
5794    Input Parameters:
5795 +  comm - MPI communicator
5796 .  m - number of local rows (Cannot be PETSC_DECIDE)
5797 .  n - This value should be the same as the local size used in creating the
5798        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5799        calculated if N is given) For square matrices n is almost always m.
5800 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5801 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5802 .   i - row indices for "diagonal" portion of matrix
5803 .   j - column indices
5804 .   a - matrix values
5805 .   oi - row indices for "off-diagonal" portion of matrix
5806 .   oj - column indices
5807 -   oa - matrix values
5808 
5809    Output Parameter:
5810 .   mat - the matrix
5811 
5812    Level: advanced
5813 
5814    Notes:
5815        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5816        must free the arrays once the matrix has been destroyed and not before.
5817 
5818        The i and j indices are 0 based
5819 
5820        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5821 
5822        This sets local rows and cannot be used to set off-processor values.
5823 
5824        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5825        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5826        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5827        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5828        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5829        communication if it is known that only local entries will be set.
5830 
5831 .keywords: matrix, aij, compressed row, sparse, parallel
5832 
5833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5834           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5835 @*/
5836 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5837 {
5838   PetscErrorCode ierr;
5839   Mat_MPIAIJ     *maij;
5840 
5841   PetscFunctionBegin;
5842   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5843   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5844   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5845   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5846   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5847   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5848   maij = (Mat_MPIAIJ*) (*mat)->data;
5849 
5850   (*mat)->preallocated = PETSC_TRUE;
5851 
5852   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5853   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5854 
5855   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5856   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5857 
5858   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5859   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5860   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5861   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5862 
5863   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5864   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5865   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5866   PetscFunctionReturn(0);
5867 }
5868 
5869 /*
5870     Special version for direct calls from Fortran
5871 */
5872 #include <petsc-private/fortranimpl.h>
5873 
5874 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5875 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5876 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5877 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5878 #endif
5879 
5880 /* Change these macros so can be used in void function */
5881 #undef CHKERRQ
5882 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5883 #undef SETERRQ2
5884 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5885 #undef SETERRQ3
5886 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5887 #undef SETERRQ
5888 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5889 
5890 #undef __FUNCT__
5891 #define __FUNCT__ "matsetvaluesmpiaij_"
5892 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5893 {
5894   Mat            mat  = *mmat;
5895   PetscInt       m    = *mm, n = *mn;
5896   InsertMode     addv = *maddv;
5897   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5898   PetscScalar    value;
5899   PetscErrorCode ierr;
5900 
5901   MatCheckPreallocated(mat,1);
5902   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5903 
5904 #if defined(PETSC_USE_DEBUG)
5905   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5906 #endif
5907   {
5908     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5909     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5910     PetscBool roworiented = aij->roworiented;
5911 
5912     /* Some Variables required in the macro */
5913     Mat        A                 = aij->A;
5914     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5915     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5916     MatScalar  *aa               = a->a;
5917     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5918     Mat        B                 = aij->B;
5919     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5920     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5921     MatScalar  *ba               = b->a;
5922 
5923     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5924     PetscInt  nonew = a->nonew;
5925     MatScalar *ap1,*ap2;
5926 
5927     PetscFunctionBegin;
5928     for (i=0; i<m; i++) {
5929       if (im[i] < 0) continue;
5930 #if defined(PETSC_USE_DEBUG)
5931       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5932 #endif
5933       if (im[i] >= rstart && im[i] < rend) {
5934         row      = im[i] - rstart;
5935         lastcol1 = -1;
5936         rp1      = aj + ai[row];
5937         ap1      = aa + ai[row];
5938         rmax1    = aimax[row];
5939         nrow1    = ailen[row];
5940         low1     = 0;
5941         high1    = nrow1;
5942         lastcol2 = -1;
5943         rp2      = bj + bi[row];
5944         ap2      = ba + bi[row];
5945         rmax2    = bimax[row];
5946         nrow2    = bilen[row];
5947         low2     = 0;
5948         high2    = nrow2;
5949 
5950         for (j=0; j<n; j++) {
5951           if (roworiented) value = v[i*n+j];
5952           else value = v[i+j*m];
5953           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5954           if (in[j] >= cstart && in[j] < cend) {
5955             col = in[j] - cstart;
5956             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5957           } else if (in[j] < 0) continue;
5958 #if defined(PETSC_USE_DEBUG)
5959           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5960 #endif
5961           else {
5962             if (mat->was_assembled) {
5963               if (!aij->colmap) {
5964                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5965               }
5966 #if defined(PETSC_USE_CTABLE)
5967               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5968               col--;
5969 #else
5970               col = aij->colmap[in[j]] - 1;
5971 #endif
5972               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5973                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5974                 col  =  in[j];
5975                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5976                 B     = aij->B;
5977                 b     = (Mat_SeqAIJ*)B->data;
5978                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5979                 rp2   = bj + bi[row];
5980                 ap2   = ba + bi[row];
5981                 rmax2 = bimax[row];
5982                 nrow2 = bilen[row];
5983                 low2  = 0;
5984                 high2 = nrow2;
5985                 bm    = aij->B->rmap->n;
5986                 ba    = b->a;
5987               }
5988             } else col = in[j];
5989             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5990           }
5991         }
5992       } else if (!aij->donotstash) {
5993         if (roworiented) {
5994           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5995         } else {
5996           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5997         }
5998       }
5999     }
6000   }
6001   PetscFunctionReturnVoid();
6002 }
6003 
6004