xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f9426fe092dba0ba2fdf65dfec8d938c4b10a31c)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     if (redund->matseq) { /* via MatGetSubMatrices()  */
2530       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2531       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2532       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2533       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2534     } else {
2535       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2537       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2538       for (i=0; i<redund->nrecvs; i++) {
2539         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2540         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2541       }
2542       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2543     }
2544 
2545     if (redund->psubcomm) {
2546       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2547     }
2548     ierr = redund->Destroy(A);CHKERRQ(ierr);
2549     ierr = PetscFree(redund);CHKERRQ(ierr);
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2557 {
2558   PetscMPIInt    rank,size;
2559   MPI_Comm       comm;
2560   PetscErrorCode ierr;
2561   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2562   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2563   PetscInt       *rowrange = mat->rmap->range;
2564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2565   Mat            A = aij->A,B=aij->B,C=*matredundant;
2566   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2567   PetscScalar    *sbuf_a;
2568   PetscInt       nzlocal=a->nz+b->nz;
2569   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2570   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2571   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2572   MatScalar      *aworkA,*aworkB;
2573   PetscScalar    *vals;
2574   PetscMPIInt    tag1,tag2,tag3,imdex;
2575   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2576   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2577   MPI_Status     recv_status,*send_status;
2578   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2579   PetscInt       **rbuf_j=NULL;
2580   PetscScalar    **rbuf_a=NULL;
2581   Mat_Redundant  *redund =NULL;
2582 
2583   PetscFunctionBegin;
2584   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2585   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2586   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2587   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2588   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2589 
2590   if (reuse == MAT_REUSE_MATRIX) {
2591     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2592     if (subsize == 1) {
2593       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2594       redund = c->redundant;
2595     } else {
2596       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2597       redund = c->redundant;
2598     }
2599     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2600 
2601     nsends    = redund->nsends;
2602     nrecvs    = redund->nrecvs;
2603     send_rank = redund->send_rank;
2604     recv_rank = redund->recv_rank;
2605     sbuf_nz   = redund->sbuf_nz;
2606     rbuf_nz   = redund->rbuf_nz;
2607     sbuf_j    = redund->sbuf_j;
2608     sbuf_a    = redund->sbuf_a;
2609     rbuf_j    = redund->rbuf_j;
2610     rbuf_a    = redund->rbuf_a;
2611   }
2612 
2613   if (reuse == MAT_INITIAL_MATRIX) {
2614     PetscInt    nleftover,np_subcomm;
2615 
2616     /* get the destination processors' id send_rank, nsends and nrecvs */
2617     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2618 
2619     np_subcomm = size/nsubcomm;
2620     nleftover  = size - nsubcomm*np_subcomm;
2621 
2622     /* block of codes below is specific for INTERLACED */
2623     /* ------------------------------------------------*/
2624     nsends = 0; nrecvs = 0;
2625     for (i=0; i<size; i++) {
2626       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2627         send_rank[nsends++] = i;
2628         recv_rank[nrecvs++] = i;
2629       }
2630     }
2631     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2632       i = size-nleftover-1;
2633       j = 0;
2634       while (j < nsubcomm - nleftover) {
2635         send_rank[nsends++] = i;
2636         i--; j++;
2637       }
2638     }
2639 
2640     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2641       for (i=0; i<nleftover; i++) {
2642         recv_rank[nrecvs++] = size-nleftover+i;
2643       }
2644     }
2645     /*----------------------------------------------*/
2646 
2647     /* allocate sbuf_j, sbuf_a */
2648     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2649     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2650     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2651     /*
2652     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2653     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2654      */
2655   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2656 
2657   /* copy mat's local entries into the buffers */
2658   if (reuse == MAT_INITIAL_MATRIX) {
2659     rownz_max = 0;
2660     rptr      = sbuf_j;
2661     cols      = sbuf_j + rend-rstart + 1;
2662     vals      = sbuf_a;
2663     rptr[0]   = 0;
2664     for (i=0; i<rend-rstart; i++) {
2665       row    = i + rstart;
2666       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2667       ncols  = nzA + nzB;
2668       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2669       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2670       /* load the column indices for this row into cols */
2671       lwrite = 0;
2672       for (l=0; l<nzB; l++) {
2673         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2674           vals[lwrite]   = aworkB[l];
2675           cols[lwrite++] = ctmp;
2676         }
2677       }
2678       for (l=0; l<nzA; l++) {
2679         vals[lwrite]   = aworkA[l];
2680         cols[lwrite++] = cstart + cworkA[l];
2681       }
2682       for (l=0; l<nzB; l++) {
2683         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2684           vals[lwrite]   = aworkB[l];
2685           cols[lwrite++] = ctmp;
2686         }
2687       }
2688       vals     += ncols;
2689       cols     += ncols;
2690       rptr[i+1] = rptr[i] + ncols;
2691       if (rownz_max < ncols) rownz_max = ncols;
2692     }
2693     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2694   } else { /* only copy matrix values into sbuf_a */
2695     rptr    = sbuf_j;
2696     vals    = sbuf_a;
2697     rptr[0] = 0;
2698     for (i=0; i<rend-rstart; i++) {
2699       row    = i + rstart;
2700       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2701       ncols  = nzA + nzB;
2702       cworkB = b->j + b->i[i];
2703       aworkA = a->a + a->i[i];
2704       aworkB = b->a + b->i[i];
2705       lwrite = 0;
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2708       }
2709       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2710       for (l=0; l<nzB; l++) {
2711         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2712       }
2713       vals     += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715     }
2716   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2717 
2718   /* send nzlocal to others, and recv other's nzlocal */
2719   /*--------------------------------------------------*/
2720   if (reuse == MAT_INITIAL_MATRIX) {
2721     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2722 
2723     s_waits2 = s_waits3 + nsends;
2724     s_waits1 = s_waits2 + nsends;
2725     r_waits1 = s_waits1 + nsends;
2726     r_waits2 = r_waits1 + nrecvs;
2727     r_waits3 = r_waits2 + nrecvs;
2728   } else {
2729     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2730 
2731     r_waits3 = s_waits3 + nsends;
2732   }
2733 
2734   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2735   if (reuse == MAT_INITIAL_MATRIX) {
2736     /* get new tags to keep the communication clean */
2737     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2738     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2739     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2740 
2741     /* post receives of other's nzlocal */
2742     for (i=0; i<nrecvs; i++) {
2743       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2744     }
2745     /* send nzlocal to others */
2746     for (i=0; i<nsends; i++) {
2747       sbuf_nz[i] = nzlocal;
2748       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2749     }
2750     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2751     count = nrecvs;
2752     while (count) {
2753       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2754 
2755       recv_rank[imdex] = recv_status.MPI_SOURCE;
2756       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2757       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2758 
2759       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2760 
2761       rbuf_nz[imdex] += i + 2;
2762 
2763       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2764       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2765       count--;
2766     }
2767     /* wait on sends of nzlocal */
2768     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2769     /* send mat->i,j to others, and recv from other's */
2770     /*------------------------------------------------*/
2771     for (i=0; i<nsends; i++) {
2772       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2773       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2774     }
2775     /* wait on receives of mat->i,j */
2776     /*------------------------------*/
2777     count = nrecvs;
2778     while (count) {
2779       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2780       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2781       count--;
2782     }
2783     /* wait on sends of mat->i,j */
2784     /*---------------------------*/
2785     if (nsends) {
2786       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2787     }
2788   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2789 
2790   /* post receives, send and receive mat->a */
2791   /*----------------------------------------*/
2792   for (imdex=0; imdex<nrecvs; imdex++) {
2793     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2794   }
2795   for (i=0; i<nsends; i++) {
2796     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2797   }
2798   count = nrecvs;
2799   while (count) {
2800     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2801     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2802     count--;
2803   }
2804   if (nsends) {
2805     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2806   }
2807 
2808   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2809 
2810   /* create redundant matrix */
2811   /*-------------------------*/
2812   if (reuse == MAT_INITIAL_MATRIX) {
2813     const PetscInt *range;
2814     PetscInt       rstart_sub,rend_sub,mloc_sub;
2815 
2816     /* compute rownz_max for preallocation */
2817     for (imdex=0; imdex<nrecvs; imdex++) {
2818       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2819       rptr = rbuf_j[imdex];
2820       for (i=0; i<j; i++) {
2821         ncols = rptr[i+1] - rptr[i];
2822         if (rownz_max < ncols) rownz_max = ncols;
2823       }
2824     }
2825 
2826     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2827 
2828     /* get local size of redundant matrix
2829        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2830     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2831     rstart_sub = range[nsubcomm*subrank];
2832     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2833       rend_sub = range[nsubcomm*(subrank+1)];
2834     } else {
2835       rend_sub = mat->rmap->N;
2836     }
2837     mloc_sub = rend_sub - rstart_sub;
2838 
2839     if (M == N) {
2840       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2841     } else { /* non-square matrix */
2842       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2843     }
2844     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2845     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2846     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2847     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2848   } else {
2849     C = *matredundant;
2850   }
2851 
2852   /* insert local matrix entries */
2853   rptr = sbuf_j;
2854   cols = sbuf_j + rend-rstart + 1;
2855   vals = sbuf_a;
2856   for (i=0; i<rend-rstart; i++) {
2857     row   = i + rstart;
2858     ncols = rptr[i+1] - rptr[i];
2859     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2860     vals += ncols;
2861     cols += ncols;
2862   }
2863   /* insert received matrix entries */
2864   for (imdex=0; imdex<nrecvs; imdex++) {
2865     rstart = rowrange[recv_rank[imdex]];
2866     rend   = rowrange[recv_rank[imdex]+1];
2867     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2868     rptr   = rbuf_j[imdex];
2869     cols   = rbuf_j[imdex] + rend-rstart + 1;
2870     vals   = rbuf_a[imdex];
2871     for (i=0; i<rend-rstart; i++) {
2872       row   = i + rstart;
2873       ncols = rptr[i+1] - rptr[i];
2874       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2875       vals += ncols;
2876       cols += ncols;
2877     }
2878   }
2879   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881 
2882   if (reuse == MAT_INITIAL_MATRIX) {
2883     *matredundant = C;
2884 
2885     /* create a supporting struct and attach it to C for reuse */
2886     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2887     if (subsize == 1) {
2888       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2889       c->redundant = redund;
2890     } else {
2891       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2892       c->redundant = redund;
2893     }
2894 
2895     redund->nzlocal   = nzlocal;
2896     redund->nsends    = nsends;
2897     redund->nrecvs    = nrecvs;
2898     redund->send_rank = send_rank;
2899     redund->recv_rank = recv_rank;
2900     redund->sbuf_nz   = sbuf_nz;
2901     redund->rbuf_nz   = rbuf_nz;
2902     redund->sbuf_j    = sbuf_j;
2903     redund->sbuf_a    = sbuf_a;
2904     redund->rbuf_j    = rbuf_j;
2905     redund->rbuf_a    = rbuf_a;
2906     redund->psubcomm  = NULL;
2907 
2908     redund->Destroy = C->ops->destroy;
2909     C->ops->destroy = MatDestroy_MatRedundant;
2910   }
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2916 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2917 {
2918   PetscErrorCode ierr;
2919   MPI_Comm       comm;
2920   PetscMPIInt    size,subsize;
2921   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2922   Mat_Redundant  *redund=NULL;
2923   PetscSubcomm   psubcomm=NULL;
2924   MPI_Comm       subcomm_in=subcomm;
2925   Mat            *matseq;
2926   IS             isrow,iscol;
2927 
2928   PetscFunctionBegin;
2929   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2930     if (reuse ==  MAT_INITIAL_MATRIX) {
2931       /* create psubcomm, then get subcomm */
2932       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2933       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2934       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2935 
2936       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2937       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2938       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2939       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2940       subcomm = psubcomm->comm;
2941     } else { /* retrieve psubcomm and subcomm */
2942       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2943       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2944       if (subsize == 1) {
2945         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2946         redund = c->redundant;
2947       } else {
2948         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2949         redund = c->redundant;
2950       }
2951       psubcomm = redund->psubcomm;
2952     }
2953     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2954       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2955       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2956         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2957         if (subsize == 1) {
2958           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2959           c->redundant->psubcomm = psubcomm;
2960         } else {
2961           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2962           c->redundant->psubcomm = psubcomm ;
2963         }
2964       }
2965       PetscFunctionReturn(0);
2966     }
2967   }
2968 
2969   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2970   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2971   if (reuse == MAT_INITIAL_MATRIX) {
2972     /* create a local sequential matrix matseq[0] */
2973     mloc_sub = PETSC_DECIDE;
2974     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2975     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2976     rstart = rend - mloc_sub;
2977     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2978     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2979   } else { /* reuse == MAT_REUSE_MATRIX */
2980     if (subsize == 1) {
2981       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2982       redund = c->redundant;
2983     } else {
2984       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2985       redund = c->redundant;
2986     }
2987 
2988     isrow  = redund->isrow;
2989     iscol  = redund->iscol;
2990     matseq = redund->matseq;
2991   }
2992   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2993   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2994 
2995   if (reuse == MAT_INITIAL_MATRIX) {
2996     /* create a supporting struct and attach it to C for reuse */
2997     ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2998     if (subsize == 1) {
2999       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3000       c->redundant = redund;
3001     } else {
3002       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3003       c->redundant = redund;
3004     }
3005     redund->isrow    = isrow;
3006     redund->iscol    = iscol;
3007     redund->matseq   = matseq;
3008     redund->psubcomm = psubcomm;
3009     redund->Destroy               = (*matredundant)->ops->destroy;
3010     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
3011   }
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3017 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3018 {
3019   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3020   PetscErrorCode ierr;
3021   PetscInt       i,*idxb = 0;
3022   PetscScalar    *va,*vb;
3023   Vec            vtmp;
3024 
3025   PetscFunctionBegin;
3026   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3027   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3028   if (idx) {
3029     for (i=0; i<A->rmap->n; i++) {
3030       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3031     }
3032   }
3033 
3034   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3035   if (idx) {
3036     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3037   }
3038   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3039   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3040 
3041   for (i=0; i<A->rmap->n; i++) {
3042     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3043       va[i] = vb[i];
3044       if (idx) idx[i] = a->garray[idxb[i]];
3045     }
3046   }
3047 
3048   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3049   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3050   ierr = PetscFree(idxb);CHKERRQ(ierr);
3051   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3057 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3058 {
3059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3060   PetscErrorCode ierr;
3061   PetscInt       i,*idxb = 0;
3062   PetscScalar    *va,*vb;
3063   Vec            vtmp;
3064 
3065   PetscFunctionBegin;
3066   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3067   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3068   if (idx) {
3069     for (i=0; i<A->cmap->n; i++) {
3070       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3071     }
3072   }
3073 
3074   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3075   if (idx) {
3076     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3077   }
3078   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3079   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3080 
3081   for (i=0; i<A->rmap->n; i++) {
3082     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3083       va[i] = vb[i];
3084       if (idx) idx[i] = a->garray[idxb[i]];
3085     }
3086   }
3087 
3088   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3089   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3090   ierr = PetscFree(idxb);CHKERRQ(ierr);
3091   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3097 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3098 {
3099   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3100   PetscInt       n      = A->rmap->n;
3101   PetscInt       cstart = A->cmap->rstart;
3102   PetscInt       *cmap  = mat->garray;
3103   PetscInt       *diagIdx, *offdiagIdx;
3104   Vec            diagV, offdiagV;
3105   PetscScalar    *a, *diagA, *offdiagA;
3106   PetscInt       r;
3107   PetscErrorCode ierr;
3108 
3109   PetscFunctionBegin;
3110   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3111   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3112   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3113   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3114   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3115   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3116   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3117   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3118   for (r = 0; r < n; ++r) {
3119     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3120       a[r]   = diagA[r];
3121       idx[r] = cstart + diagIdx[r];
3122     } else {
3123       a[r]   = offdiagA[r];
3124       idx[r] = cmap[offdiagIdx[r]];
3125     }
3126   }
3127   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3128   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3129   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3130   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3131   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3132   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3138 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3139 {
3140   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3141   PetscInt       n      = A->rmap->n;
3142   PetscInt       cstart = A->cmap->rstart;
3143   PetscInt       *cmap  = mat->garray;
3144   PetscInt       *diagIdx, *offdiagIdx;
3145   Vec            diagV, offdiagV;
3146   PetscScalar    *a, *diagA, *offdiagA;
3147   PetscInt       r;
3148   PetscErrorCode ierr;
3149 
3150   PetscFunctionBegin;
3151   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3152   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3153   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3154   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3155   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3156   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3157   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3158   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3159   for (r = 0; r < n; ++r) {
3160     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3161       a[r]   = diagA[r];
3162       idx[r] = cstart + diagIdx[r];
3163     } else {
3164       a[r]   = offdiagA[r];
3165       idx[r] = cmap[offdiagIdx[r]];
3166     }
3167   }
3168   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3169   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3170   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3171   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3172   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3173   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3174   PetscFunctionReturn(0);
3175 }
3176 
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3179 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3180 {
3181   PetscErrorCode ierr;
3182   Mat            *dummy;
3183 
3184   PetscFunctionBegin;
3185   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3186   *newmat = *dummy;
3187   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3192 
3193 #undef __FUNCT__
3194 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3195 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3196 {
3197   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3198   PetscErrorCode ierr;
3199 
3200   PetscFunctionBegin;
3201   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3207 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3208 {
3209   PetscErrorCode ierr;
3210   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3211 
3212   PetscFunctionBegin;
3213   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3214   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3215   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 /* -------------------------------------------------------------------*/
3221 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3222                                        MatGetRow_MPIAIJ,
3223                                        MatRestoreRow_MPIAIJ,
3224                                        MatMult_MPIAIJ,
3225                                 /* 4*/ MatMultAdd_MPIAIJ,
3226                                        MatMultTranspose_MPIAIJ,
3227                                        MatMultTransposeAdd_MPIAIJ,
3228 #if defined(PETSC_HAVE_PBGL)
3229                                        MatSolve_MPIAIJ,
3230 #else
3231                                        0,
3232 #endif
3233                                        0,
3234                                        0,
3235                                 /*10*/ 0,
3236                                        0,
3237                                        0,
3238                                        MatSOR_MPIAIJ,
3239                                        MatTranspose_MPIAIJ,
3240                                 /*15*/ MatGetInfo_MPIAIJ,
3241                                        MatEqual_MPIAIJ,
3242                                        MatGetDiagonal_MPIAIJ,
3243                                        MatDiagonalScale_MPIAIJ,
3244                                        MatNorm_MPIAIJ,
3245                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3246                                        MatAssemblyEnd_MPIAIJ,
3247                                        MatSetOption_MPIAIJ,
3248                                        MatZeroEntries_MPIAIJ,
3249                                 /*24*/ MatZeroRows_MPIAIJ,
3250                                        0,
3251 #if defined(PETSC_HAVE_PBGL)
3252                                        0,
3253 #else
3254                                        0,
3255 #endif
3256                                        0,
3257                                        0,
3258                                 /*29*/ MatSetUp_MPIAIJ,
3259 #if defined(PETSC_HAVE_PBGL)
3260                                        0,
3261 #else
3262                                        0,
3263 #endif
3264                                        0,
3265                                        0,
3266                                        0,
3267                                 /*34*/ MatDuplicate_MPIAIJ,
3268                                        0,
3269                                        0,
3270                                        0,
3271                                        0,
3272                                 /*39*/ MatAXPY_MPIAIJ,
3273                                        MatGetSubMatrices_MPIAIJ,
3274                                        MatIncreaseOverlap_MPIAIJ,
3275                                        MatGetValues_MPIAIJ,
3276                                        MatCopy_MPIAIJ,
3277                                 /*44*/ MatGetRowMax_MPIAIJ,
3278                                        MatScale_MPIAIJ,
3279                                        0,
3280                                        0,
3281                                        MatZeroRowsColumns_MPIAIJ,
3282                                 /*49*/ MatSetRandom_MPIAIJ,
3283                                        0,
3284                                        0,
3285                                        0,
3286                                        0,
3287                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3288                                        0,
3289                                        MatSetUnfactored_MPIAIJ,
3290                                        MatPermute_MPIAIJ,
3291                                        0,
3292                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3293                                        MatDestroy_MPIAIJ,
3294                                        MatView_MPIAIJ,
3295                                        0,
3296                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3297                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3298                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3299                                        0,
3300                                        0,
3301                                        0,
3302                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3303                                        MatGetRowMinAbs_MPIAIJ,
3304                                        0,
3305                                        MatSetColoring_MPIAIJ,
3306                                        0,
3307                                        MatSetValuesAdifor_MPIAIJ,
3308                                 /*75*/ MatFDColoringApply_AIJ,
3309                                        0,
3310                                        0,
3311                                        0,
3312                                        MatFindZeroDiagonals_MPIAIJ,
3313                                 /*80*/ 0,
3314                                        0,
3315                                        0,
3316                                 /*83*/ MatLoad_MPIAIJ,
3317                                        0,
3318                                        0,
3319                                        0,
3320                                        0,
3321                                        0,
3322                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3323                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3324                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3325                                        MatPtAP_MPIAIJ_MPIAIJ,
3326                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3327                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3328                                        0,
3329                                        0,
3330                                        0,
3331                                        0,
3332                                 /*99*/ 0,
3333                                        0,
3334                                        0,
3335                                        MatConjugate_MPIAIJ,
3336                                        0,
3337                                 /*104*/MatSetValuesRow_MPIAIJ,
3338                                        MatRealPart_MPIAIJ,
3339                                        MatImaginaryPart_MPIAIJ,
3340                                        0,
3341                                        0,
3342                                 /*109*/0,
3343                                        MatGetRedundantMatrix_MPIAIJ,
3344                                        MatGetRowMin_MPIAIJ,
3345                                        0,
3346                                        0,
3347                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3348                                        0,
3349                                        0,
3350                                        0,
3351                                        0,
3352                                 /*119*/0,
3353                                        0,
3354                                        0,
3355                                        0,
3356                                        MatGetMultiProcBlock_MPIAIJ,
3357                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3358                                        MatGetColumnNorms_MPIAIJ,
3359                                        MatInvertBlockDiagonal_MPIAIJ,
3360                                        0,
3361                                        MatGetSubMatricesParallel_MPIAIJ,
3362                                 /*129*/0,
3363                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3364                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3365                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3366                                        0,
3367                                 /*134*/0,
3368                                        0,
3369                                        0,
3370                                        0,
3371                                        0,
3372                                 /*139*/0,
3373                                        0,
3374                                        0
3375 };
3376 
3377 /* ----------------------------------------------------------------------------------------*/
3378 
3379 #undef __FUNCT__
3380 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3381 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3382 {
3383   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3384   PetscErrorCode ierr;
3385 
3386   PetscFunctionBegin;
3387   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3388   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 #undef __FUNCT__
3393 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3394 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3395 {
3396   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3397   PetscErrorCode ierr;
3398 
3399   PetscFunctionBegin;
3400   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3401   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3407 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3408 {
3409   Mat_MPIAIJ     *b;
3410   PetscErrorCode ierr;
3411 
3412   PetscFunctionBegin;
3413   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3414   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3415   b = (Mat_MPIAIJ*)B->data;
3416 
3417   if (!B->preallocated) {
3418     /* Explicitly create 2 MATSEQAIJ matrices. */
3419     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3420     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3421     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3422     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3423     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3424     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3425     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3426     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3427     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3428     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3429   }
3430 
3431   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3432   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3433   B->preallocated = PETSC_TRUE;
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3439 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3440 {
3441   Mat            mat;
3442   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3443   PetscErrorCode ierr;
3444 
3445   PetscFunctionBegin;
3446   *newmat = 0;
3447   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3448   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3449   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3450   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3451   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3452   a       = (Mat_MPIAIJ*)mat->data;
3453 
3454   mat->factortype   = matin->factortype;
3455   mat->rmap->bs     = matin->rmap->bs;
3456   mat->cmap->bs     = matin->cmap->bs;
3457   mat->assembled    = PETSC_TRUE;
3458   mat->insertmode   = NOT_SET_VALUES;
3459   mat->preallocated = PETSC_TRUE;
3460 
3461   a->size         = oldmat->size;
3462   a->rank         = oldmat->rank;
3463   a->donotstash   = oldmat->donotstash;
3464   a->roworiented  = oldmat->roworiented;
3465   a->rowindices   = 0;
3466   a->rowvalues    = 0;
3467   a->getrowactive = PETSC_FALSE;
3468 
3469   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3470   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3471 
3472   if (oldmat->colmap) {
3473 #if defined(PETSC_USE_CTABLE)
3474     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3475 #else
3476     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3477     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3478     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3479 #endif
3480   } else a->colmap = 0;
3481   if (oldmat->garray) {
3482     PetscInt len;
3483     len  = oldmat->B->cmap->n;
3484     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3485     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3486     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3487   } else a->garray = 0;
3488 
3489   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3490   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3491   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3492   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3493   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3494   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3495   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3496   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3497   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3498   *newmat = mat;
3499   PetscFunctionReturn(0);
3500 }
3501 
3502 
3503 
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatLoad_MPIAIJ"
3506 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3507 {
3508   PetscScalar    *vals,*svals;
3509   MPI_Comm       comm;
3510   PetscErrorCode ierr;
3511   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3512   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3513   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3514   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3515   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3516   int            fd;
3517   PetscInt       bs = 1;
3518 
3519   PetscFunctionBegin;
3520   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3521   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3522   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3523   if (!rank) {
3524     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3525     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3526     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3527   }
3528 
3529   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3530   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3531   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3532 
3533   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3534 
3535   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3536   M    = header[1]; N = header[2];
3537   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3538   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3539   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3540 
3541   /* If global sizes are set, check if they are consistent with that given in the file */
3542   if (sizesset) {
3543     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3544   }
3545   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3546   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3547 
3548   /* determine ownership of all (block) rows */
3549   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3550   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3551   else m = newMat->rmap->n; /* Set by user */
3552 
3553   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3554   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3555 
3556   /* First process needs enough room for process with most rows */
3557   if (!rank) {
3558     mmax = rowners[1];
3559     for (i=2; i<=size; i++) {
3560       mmax = PetscMax(mmax, rowners[i]);
3561     }
3562   } else mmax = -1;             /* unused, but compilers complain */
3563 
3564   rowners[0] = 0;
3565   for (i=2; i<=size; i++) {
3566     rowners[i] += rowners[i-1];
3567   }
3568   rstart = rowners[rank];
3569   rend   = rowners[rank+1];
3570 
3571   /* distribute row lengths to all processors */
3572   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3573   if (!rank) {
3574     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3575     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3576     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3577     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3578     for (j=0; j<m; j++) {
3579       procsnz[0] += ourlens[j];
3580     }
3581     for (i=1; i<size; i++) {
3582       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3583       /* calculate the number of nonzeros on each processor */
3584       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3585         procsnz[i] += rowlengths[j];
3586       }
3587       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3588     }
3589     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3590   } else {
3591     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3592   }
3593 
3594   if (!rank) {
3595     /* determine max buffer needed and allocate it */
3596     maxnz = 0;
3597     for (i=0; i<size; i++) {
3598       maxnz = PetscMax(maxnz,procsnz[i]);
3599     }
3600     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3601 
3602     /* read in my part of the matrix column indices  */
3603     nz   = procsnz[0];
3604     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3605     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3606 
3607     /* read in every one elses and ship off */
3608     for (i=1; i<size; i++) {
3609       nz   = procsnz[i];
3610       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3611       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3612     }
3613     ierr = PetscFree(cols);CHKERRQ(ierr);
3614   } else {
3615     /* determine buffer space needed for message */
3616     nz = 0;
3617     for (i=0; i<m; i++) {
3618       nz += ourlens[i];
3619     }
3620     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3621 
3622     /* receive message of column indices*/
3623     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3624   }
3625 
3626   /* determine column ownership if matrix is not square */
3627   if (N != M) {
3628     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3629     else n = newMat->cmap->n;
3630     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3631     cstart = cend - n;
3632   } else {
3633     cstart = rstart;
3634     cend   = rend;
3635     n      = cend - cstart;
3636   }
3637 
3638   /* loop over local rows, determining number of off diagonal entries */
3639   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3640   jj   = 0;
3641   for (i=0; i<m; i++) {
3642     for (j=0; j<ourlens[i]; j++) {
3643       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3644       jj++;
3645     }
3646   }
3647 
3648   for (i=0; i<m; i++) {
3649     ourlens[i] -= offlens[i];
3650   }
3651   if (!sizesset) {
3652     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3653   }
3654 
3655   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3656 
3657   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3658 
3659   for (i=0; i<m; i++) {
3660     ourlens[i] += offlens[i];
3661   }
3662 
3663   if (!rank) {
3664     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3665 
3666     /* read in my part of the matrix numerical values  */
3667     nz   = procsnz[0];
3668     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3669 
3670     /* insert into matrix */
3671     jj      = rstart;
3672     smycols = mycols;
3673     svals   = vals;
3674     for (i=0; i<m; i++) {
3675       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3676       smycols += ourlens[i];
3677       svals   += ourlens[i];
3678       jj++;
3679     }
3680 
3681     /* read in other processors and ship out */
3682     for (i=1; i<size; i++) {
3683       nz   = procsnz[i];
3684       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3685       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3686     }
3687     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3688   } else {
3689     /* receive numeric values */
3690     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3691 
3692     /* receive message of values*/
3693     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3694 
3695     /* insert into matrix */
3696     jj      = rstart;
3697     smycols = mycols;
3698     svals   = vals;
3699     for (i=0; i<m; i++) {
3700       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3701       smycols += ourlens[i];
3702       svals   += ourlens[i];
3703       jj++;
3704     }
3705   }
3706   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3707   ierr = PetscFree(vals);CHKERRQ(ierr);
3708   ierr = PetscFree(mycols);CHKERRQ(ierr);
3709   ierr = PetscFree(rowners);CHKERRQ(ierr);
3710   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 #undef __FUNCT__
3716 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3717 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3718 {
3719   PetscErrorCode ierr;
3720   IS             iscol_local;
3721   PetscInt       csize;
3722 
3723   PetscFunctionBegin;
3724   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3725   if (call == MAT_REUSE_MATRIX) {
3726     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3727     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3728   } else {
3729     PetscInt cbs;
3730     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3731     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3732     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3733   }
3734   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3735   if (call == MAT_INITIAL_MATRIX) {
3736     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3737     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3738   }
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3743 #undef __FUNCT__
3744 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3745 /*
3746     Not great since it makes two copies of the submatrix, first an SeqAIJ
3747   in local and then by concatenating the local matrices the end result.
3748   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3749 
3750   Note: This requires a sequential iscol with all indices.
3751 */
3752 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3753 {
3754   PetscErrorCode ierr;
3755   PetscMPIInt    rank,size;
3756   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3757   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3758   PetscBool      allcolumns, colflag;
3759   Mat            M,Mreuse;
3760   MatScalar      *vwork,*aa;
3761   MPI_Comm       comm;
3762   Mat_SeqAIJ     *aij;
3763 
3764   PetscFunctionBegin;
3765   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3766   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3767   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3768 
3769   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3770   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3771   if (colflag && ncol == mat->cmap->N) {
3772     allcolumns = PETSC_TRUE;
3773   } else {
3774     allcolumns = PETSC_FALSE;
3775   }
3776   if (call ==  MAT_REUSE_MATRIX) {
3777     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3778     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3779     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3780   } else {
3781     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3782   }
3783 
3784   /*
3785       m - number of local rows
3786       n - number of columns (same on all processors)
3787       rstart - first row in new global matrix generated
3788   */
3789   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3790   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3791   if (call == MAT_INITIAL_MATRIX) {
3792     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3793     ii  = aij->i;
3794     jj  = aij->j;
3795 
3796     /*
3797         Determine the number of non-zeros in the diagonal and off-diagonal
3798         portions of the matrix in order to do correct preallocation
3799     */
3800 
3801     /* first get start and end of "diagonal" columns */
3802     if (csize == PETSC_DECIDE) {
3803       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3804       if (mglobal == n) { /* square matrix */
3805         nlocal = m;
3806       } else {
3807         nlocal = n/size + ((n % size) > rank);
3808       }
3809     } else {
3810       nlocal = csize;
3811     }
3812     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3813     rstart = rend - nlocal;
3814     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3815 
3816     /* next, compute all the lengths */
3817     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3818     olens = dlens + m;
3819     for (i=0; i<m; i++) {
3820       jend = ii[i+1] - ii[i];
3821       olen = 0;
3822       dlen = 0;
3823       for (j=0; j<jend; j++) {
3824         if (*jj < rstart || *jj >= rend) olen++;
3825         else dlen++;
3826         jj++;
3827       }
3828       olens[i] = olen;
3829       dlens[i] = dlen;
3830     }
3831     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3832     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3833     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3834     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3835     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3836     ierr = PetscFree(dlens);CHKERRQ(ierr);
3837   } else {
3838     PetscInt ml,nl;
3839 
3840     M    = *newmat;
3841     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3842     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3843     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3844     /*
3845          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3846        rather than the slower MatSetValues().
3847     */
3848     M->was_assembled = PETSC_TRUE;
3849     M->assembled     = PETSC_FALSE;
3850   }
3851   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3852   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3853   ii   = aij->i;
3854   jj   = aij->j;
3855   aa   = aij->a;
3856   for (i=0; i<m; i++) {
3857     row   = rstart + i;
3858     nz    = ii[i+1] - ii[i];
3859     cwork = jj;     jj += nz;
3860     vwork = aa;     aa += nz;
3861     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3862   }
3863 
3864   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866   *newmat = M;
3867 
3868   /* save submatrix used in processor for next request */
3869   if (call ==  MAT_INITIAL_MATRIX) {
3870     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3871     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3872   }
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 #undef __FUNCT__
3877 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3878 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3879 {
3880   PetscInt       m,cstart, cend,j,nnz,i,d;
3881   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3882   const PetscInt *JJ;
3883   PetscScalar    *values;
3884   PetscErrorCode ierr;
3885 
3886   PetscFunctionBegin;
3887   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3888 
3889   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3890   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3891   m      = B->rmap->n;
3892   cstart = B->cmap->rstart;
3893   cend   = B->cmap->rend;
3894   rstart = B->rmap->rstart;
3895 
3896   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3897 
3898 #if defined(PETSC_USE_DEBUGGING)
3899   for (i=0; i<m; i++) {
3900     nnz = Ii[i+1]- Ii[i];
3901     JJ  = J + Ii[i];
3902     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3903     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3904     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3905   }
3906 #endif
3907 
3908   for (i=0; i<m; i++) {
3909     nnz     = Ii[i+1]- Ii[i];
3910     JJ      = J + Ii[i];
3911     nnz_max = PetscMax(nnz_max,nnz);
3912     d       = 0;
3913     for (j=0; j<nnz; j++) {
3914       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3915     }
3916     d_nnz[i] = d;
3917     o_nnz[i] = nnz - d;
3918   }
3919   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3920   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3921 
3922   if (v) values = (PetscScalar*)v;
3923   else {
3924     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3925     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3926   }
3927 
3928   for (i=0; i<m; i++) {
3929     ii   = i + rstart;
3930     nnz  = Ii[i+1]- Ii[i];
3931     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3932   }
3933   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935 
3936   if (!v) {
3937     ierr = PetscFree(values);CHKERRQ(ierr);
3938   }
3939   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 #undef __FUNCT__
3944 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3945 /*@
3946    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3947    (the default parallel PETSc format).
3948 
3949    Collective on MPI_Comm
3950 
3951    Input Parameters:
3952 +  B - the matrix
3953 .  i - the indices into j for the start of each local row (starts with zero)
3954 .  j - the column indices for each local row (starts with zero)
3955 -  v - optional values in the matrix
3956 
3957    Level: developer
3958 
3959    Notes:
3960        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3961      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3962      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3963 
3964        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3965 
3966        The format which is used for the sparse matrix input, is equivalent to a
3967     row-major ordering.. i.e for the following matrix, the input data expected is
3968     as shown:
3969 
3970         1 0 0
3971         2 0 3     P0
3972        -------
3973         4 5 6     P1
3974 
3975      Process0 [P0]: rows_owned=[0,1]
3976         i =  {0,1,3}  [size = nrow+1  = 2+1]
3977         j =  {0,0,2}  [size = nz = 6]
3978         v =  {1,2,3}  [size = nz = 6]
3979 
3980      Process1 [P1]: rows_owned=[2]
3981         i =  {0,3}    [size = nrow+1  = 1+1]
3982         j =  {0,1,2}  [size = nz = 6]
3983         v =  {4,5,6}  [size = nz = 6]
3984 
3985 .keywords: matrix, aij, compressed row, sparse, parallel
3986 
3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3988           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3989 @*/
3990 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3991 {
3992   PetscErrorCode ierr;
3993 
3994   PetscFunctionBegin;
3995   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3996   PetscFunctionReturn(0);
3997 }
3998 
3999 #undef __FUNCT__
4000 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4001 /*@C
4002    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4003    (the default parallel PETSc format).  For good matrix assembly performance
4004    the user should preallocate the matrix storage by setting the parameters
4005    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4006    performance can be increased by more than a factor of 50.
4007 
4008    Collective on MPI_Comm
4009 
4010    Input Parameters:
4011 +  A - the matrix
4012 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4013            (same value is used for all local rows)
4014 .  d_nnz - array containing the number of nonzeros in the various rows of the
4015            DIAGONAL portion of the local submatrix (possibly different for each row)
4016            or NULL, if d_nz is used to specify the nonzero structure.
4017            The size of this array is equal to the number of local rows, i.e 'm'.
4018            For matrices that will be factored, you must leave room for (and set)
4019            the diagonal entry even if it is zero.
4020 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4021            submatrix (same value is used for all local rows).
4022 -  o_nnz - array containing the number of nonzeros in the various rows of the
4023            OFF-DIAGONAL portion of the local submatrix (possibly different for
4024            each row) or NULL, if o_nz is used to specify the nonzero
4025            structure. The size of this array is equal to the number
4026            of local rows, i.e 'm'.
4027 
4028    If the *_nnz parameter is given then the *_nz parameter is ignored
4029 
4030    The AIJ format (also called the Yale sparse matrix format or
4031    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4032    storage.  The stored row and column indices begin with zero.
4033    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4034 
4035    The parallel matrix is partitioned such that the first m0 rows belong to
4036    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4037    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4038 
4039    The DIAGONAL portion of the local submatrix of a processor can be defined
4040    as the submatrix which is obtained by extraction the part corresponding to
4041    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4042    first row that belongs to the processor, r2 is the last row belonging to
4043    the this processor, and c1-c2 is range of indices of the local part of a
4044    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4045    common case of a square matrix, the row and column ranges are the same and
4046    the DIAGONAL part is also square. The remaining portion of the local
4047    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4048 
4049    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4050 
4051    You can call MatGetInfo() to get information on how effective the preallocation was;
4052    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4053    You can also run with the option -info and look for messages with the string
4054    malloc in them to see if additional memory allocation was needed.
4055 
4056    Example usage:
4057 
4058    Consider the following 8x8 matrix with 34 non-zero values, that is
4059    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4060    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4061    as follows:
4062 
4063 .vb
4064             1  2  0  |  0  3  0  |  0  4
4065     Proc0   0  5  6  |  7  0  0  |  8  0
4066             9  0 10  | 11  0  0  | 12  0
4067     -------------------------------------
4068            13  0 14  | 15 16 17  |  0  0
4069     Proc1   0 18  0  | 19 20 21  |  0  0
4070             0  0  0  | 22 23  0  | 24  0
4071     -------------------------------------
4072     Proc2  25 26 27  |  0  0 28  | 29  0
4073            30  0  0  | 31 32 33  |  0 34
4074 .ve
4075 
4076    This can be represented as a collection of submatrices as:
4077 
4078 .vb
4079       A B C
4080       D E F
4081       G H I
4082 .ve
4083 
4084    Where the submatrices A,B,C are owned by proc0, D,E,F are
4085    owned by proc1, G,H,I are owned by proc2.
4086 
4087    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4088    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4089    The 'M','N' parameters are 8,8, and have the same values on all procs.
4090 
4091    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4092    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4093    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4094    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4095    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4096    matrix, ans [DF] as another SeqAIJ matrix.
4097 
4098    When d_nz, o_nz parameters are specified, d_nz storage elements are
4099    allocated for every row of the local diagonal submatrix, and o_nz
4100    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4101    One way to choose d_nz and o_nz is to use the max nonzerors per local
4102    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4103    In this case, the values of d_nz,o_nz are:
4104 .vb
4105      proc0 : dnz = 2, o_nz = 2
4106      proc1 : dnz = 3, o_nz = 2
4107      proc2 : dnz = 1, o_nz = 4
4108 .ve
4109    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4110    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4111    for proc3. i.e we are using 12+15+10=37 storage locations to store
4112    34 values.
4113 
4114    When d_nnz, o_nnz parameters are specified, the storage is specified
4115    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4116    In the above case the values for d_nnz,o_nnz are:
4117 .vb
4118      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4119      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4120      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4121 .ve
4122    Here the space allocated is sum of all the above values i.e 34, and
4123    hence pre-allocation is perfect.
4124 
4125    Level: intermediate
4126 
4127 .keywords: matrix, aij, compressed row, sparse, parallel
4128 
4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4130           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4131 @*/
4132 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4133 {
4134   PetscErrorCode ierr;
4135 
4136   PetscFunctionBegin;
4137   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4138   PetscValidType(B,1);
4139   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4140   PetscFunctionReturn(0);
4141 }
4142 
4143 #undef __FUNCT__
4144 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4145 /*@
4146      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4147          CSR format the local rows.
4148 
4149    Collective on MPI_Comm
4150 
4151    Input Parameters:
4152 +  comm - MPI communicator
4153 .  m - number of local rows (Cannot be PETSC_DECIDE)
4154 .  n - This value should be the same as the local size used in creating the
4155        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4156        calculated if N is given) For square matrices n is almost always m.
4157 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4158 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4159 .   i - row indices
4160 .   j - column indices
4161 -   a - matrix values
4162 
4163    Output Parameter:
4164 .   mat - the matrix
4165 
4166    Level: intermediate
4167 
4168    Notes:
4169        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4170      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4171      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4172 
4173        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4174 
4175        The format which is used for the sparse matrix input, is equivalent to a
4176     row-major ordering.. i.e for the following matrix, the input data expected is
4177     as shown:
4178 
4179         1 0 0
4180         2 0 3     P0
4181        -------
4182         4 5 6     P1
4183 
4184      Process0 [P0]: rows_owned=[0,1]
4185         i =  {0,1,3}  [size = nrow+1  = 2+1]
4186         j =  {0,0,2}  [size = nz = 6]
4187         v =  {1,2,3}  [size = nz = 6]
4188 
4189      Process1 [P1]: rows_owned=[2]
4190         i =  {0,3}    [size = nrow+1  = 1+1]
4191         j =  {0,1,2}  [size = nz = 6]
4192         v =  {4,5,6}  [size = nz = 6]
4193 
4194 .keywords: matrix, aij, compressed row, sparse, parallel
4195 
4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4197           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4198 @*/
4199 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4200 {
4201   PetscErrorCode ierr;
4202 
4203   PetscFunctionBegin;
4204   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4205   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4207   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4208   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4209   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4210   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 #undef __FUNCT__
4215 #define __FUNCT__ "MatCreateAIJ"
4216 /*@C
4217    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4218    (the default parallel PETSc format).  For good matrix assembly performance
4219    the user should preallocate the matrix storage by setting the parameters
4220    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4221    performance can be increased by more than a factor of 50.
4222 
4223    Collective on MPI_Comm
4224 
4225    Input Parameters:
4226 +  comm - MPI communicator
4227 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4228            This value should be the same as the local size used in creating the
4229            y vector for the matrix-vector product y = Ax.
4230 .  n - This value should be the same as the local size used in creating the
4231        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4232        calculated if N is given) For square matrices n is almost always m.
4233 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4234 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4235 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4236            (same value is used for all local rows)
4237 .  d_nnz - array containing the number of nonzeros in the various rows of the
4238            DIAGONAL portion of the local submatrix (possibly different for each row)
4239            or NULL, if d_nz is used to specify the nonzero structure.
4240            The size of this array is equal to the number of local rows, i.e 'm'.
4241 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4242            submatrix (same value is used for all local rows).
4243 -  o_nnz - array containing the number of nonzeros in the various rows of the
4244            OFF-DIAGONAL portion of the local submatrix (possibly different for
4245            each row) or NULL, if o_nz is used to specify the nonzero
4246            structure. The size of this array is equal to the number
4247            of local rows, i.e 'm'.
4248 
4249    Output Parameter:
4250 .  A - the matrix
4251 
4252    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4253    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4254    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4255 
4256    Notes:
4257    If the *_nnz parameter is given then the *_nz parameter is ignored
4258 
4259    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4260    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4261    storage requirements for this matrix.
4262 
4263    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4264    processor than it must be used on all processors that share the object for
4265    that argument.
4266 
4267    The user MUST specify either the local or global matrix dimensions
4268    (possibly both).
4269 
4270    The parallel matrix is partitioned across processors such that the
4271    first m0 rows belong to process 0, the next m1 rows belong to
4272    process 1, the next m2 rows belong to process 2 etc.. where
4273    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4274    values corresponding to [m x N] submatrix.
4275 
4276    The columns are logically partitioned with the n0 columns belonging
4277    to 0th partition, the next n1 columns belonging to the next
4278    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4279 
4280    The DIAGONAL portion of the local submatrix on any given processor
4281    is the submatrix corresponding to the rows and columns m,n
4282    corresponding to the given processor. i.e diagonal matrix on
4283    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4284    etc. The remaining portion of the local submatrix [m x (N-n)]
4285    constitute the OFF-DIAGONAL portion. The example below better
4286    illustrates this concept.
4287 
4288    For a square global matrix we define each processor's diagonal portion
4289    to be its local rows and the corresponding columns (a square submatrix);
4290    each processor's off-diagonal portion encompasses the remainder of the
4291    local matrix (a rectangular submatrix).
4292 
4293    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4294 
4295    When calling this routine with a single process communicator, a matrix of
4296    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4297    type of communicator, use the construction mechanism:
4298      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4299 
4300    By default, this format uses inodes (identical nodes) when possible.
4301    We search for consecutive rows with the same nonzero structure, thereby
4302    reusing matrix information to achieve increased efficiency.
4303 
4304    Options Database Keys:
4305 +  -mat_no_inode  - Do not use inodes
4306 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4307 -  -mat_aij_oneindex - Internally use indexing starting at 1
4308         rather than 0.  Note that when calling MatSetValues(),
4309         the user still MUST index entries starting at 0!
4310 
4311 
4312    Example usage:
4313 
4314    Consider the following 8x8 matrix with 34 non-zero values, that is
4315    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4316    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4317    as follows:
4318 
4319 .vb
4320             1  2  0  |  0  3  0  |  0  4
4321     Proc0   0  5  6  |  7  0  0  |  8  0
4322             9  0 10  | 11  0  0  | 12  0
4323     -------------------------------------
4324            13  0 14  | 15 16 17  |  0  0
4325     Proc1   0 18  0  | 19 20 21  |  0  0
4326             0  0  0  | 22 23  0  | 24  0
4327     -------------------------------------
4328     Proc2  25 26 27  |  0  0 28  | 29  0
4329            30  0  0  | 31 32 33  |  0 34
4330 .ve
4331 
4332    This can be represented as a collection of submatrices as:
4333 
4334 .vb
4335       A B C
4336       D E F
4337       G H I
4338 .ve
4339 
4340    Where the submatrices A,B,C are owned by proc0, D,E,F are
4341    owned by proc1, G,H,I are owned by proc2.
4342 
4343    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4344    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4345    The 'M','N' parameters are 8,8, and have the same values on all procs.
4346 
4347    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4348    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4349    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4350    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4351    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4352    matrix, ans [DF] as another SeqAIJ matrix.
4353 
4354    When d_nz, o_nz parameters are specified, d_nz storage elements are
4355    allocated for every row of the local diagonal submatrix, and o_nz
4356    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4357    One way to choose d_nz and o_nz is to use the max nonzerors per local
4358    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4359    In this case, the values of d_nz,o_nz are:
4360 .vb
4361      proc0 : dnz = 2, o_nz = 2
4362      proc1 : dnz = 3, o_nz = 2
4363      proc2 : dnz = 1, o_nz = 4
4364 .ve
4365    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4366    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4367    for proc3. i.e we are using 12+15+10=37 storage locations to store
4368    34 values.
4369 
4370    When d_nnz, o_nnz parameters are specified, the storage is specified
4371    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4372    In the above case the values for d_nnz,o_nnz are:
4373 .vb
4374      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4375      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4376      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4377 .ve
4378    Here the space allocated is sum of all the above values i.e 34, and
4379    hence pre-allocation is perfect.
4380 
4381    Level: intermediate
4382 
4383 .keywords: matrix, aij, compressed row, sparse, parallel
4384 
4385 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4386           MPIAIJ, MatCreateMPIAIJWithArrays()
4387 @*/
4388 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4389 {
4390   PetscErrorCode ierr;
4391   PetscMPIInt    size;
4392 
4393   PetscFunctionBegin;
4394   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4395   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4396   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4397   if (size > 1) {
4398     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4399     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4400   } else {
4401     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4402     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4403   }
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #undef __FUNCT__
4408 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4409 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4410 {
4411   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4412 
4413   PetscFunctionBegin;
4414   *Ad     = a->A;
4415   *Ao     = a->B;
4416   *colmap = a->garray;
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4422 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4423 {
4424   PetscErrorCode ierr;
4425   PetscInt       i;
4426   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4427 
4428   PetscFunctionBegin;
4429   if (coloring->ctype == IS_COLORING_GLOBAL) {
4430     ISColoringValue *allcolors,*colors;
4431     ISColoring      ocoloring;
4432 
4433     /* set coloring for diagonal portion */
4434     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4435 
4436     /* set coloring for off-diagonal portion */
4437     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4438     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4439     for (i=0; i<a->B->cmap->n; i++) {
4440       colors[i] = allcolors[a->garray[i]];
4441     }
4442     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4443     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4444     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4445     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4446   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4447     ISColoringValue *colors;
4448     PetscInt        *larray;
4449     ISColoring      ocoloring;
4450 
4451     /* set coloring for diagonal portion */
4452     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4453     for (i=0; i<a->A->cmap->n; i++) {
4454       larray[i] = i + A->cmap->rstart;
4455     }
4456     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4457     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4458     for (i=0; i<a->A->cmap->n; i++) {
4459       colors[i] = coloring->colors[larray[i]];
4460     }
4461     ierr = PetscFree(larray);CHKERRQ(ierr);
4462     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4463     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4464     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4465 
4466     /* set coloring for off-diagonal portion */
4467     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4468     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4469     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4470     for (i=0; i<a->B->cmap->n; i++) {
4471       colors[i] = coloring->colors[larray[i]];
4472     }
4473     ierr = PetscFree(larray);CHKERRQ(ierr);
4474     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4475     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4476     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4477   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4478   PetscFunctionReturn(0);
4479 }
4480 
4481 #undef __FUNCT__
4482 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4483 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4484 {
4485   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4486   PetscErrorCode ierr;
4487 
4488   PetscFunctionBegin;
4489   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4490   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4491   PetscFunctionReturn(0);
4492 }
4493 
4494 #undef __FUNCT__
4495 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4496 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4497 {
4498   PetscErrorCode ierr;
4499   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4500   PetscInt       *indx;
4501 
4502   PetscFunctionBegin;
4503   /* This routine will ONLY return MPIAIJ type matrix */
4504   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4505   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4506   if (n == PETSC_DECIDE) {
4507     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4508   }
4509   /* Check sum(n) = N */
4510   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4511   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4512 
4513   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4514   rstart -= m;
4515 
4516   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4517   for (i=0; i<m; i++) {
4518     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4519     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4520     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4521   }
4522 
4523   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4524   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4525   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4526   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4527   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4528   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 #undef __FUNCT__
4533 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4534 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4535 {
4536   PetscErrorCode ierr;
4537   PetscInt       m,N,i,rstart,nnz,Ii;
4538   PetscInt       *indx;
4539   PetscScalar    *values;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4543   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4544   for (i=0; i<m; i++) {
4545     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4546     Ii   = i + rstart;
4547     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4548     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4549   }
4550   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 #undef __FUNCT__
4556 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4557 /*@
4558       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4559                  matrices from each processor
4560 
4561     Collective on MPI_Comm
4562 
4563    Input Parameters:
4564 +    comm - the communicators the parallel matrix will live on
4565 .    inmat - the input sequential matrices
4566 .    n - number of local columns (or PETSC_DECIDE)
4567 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4568 
4569    Output Parameter:
4570 .    outmat - the parallel matrix generated
4571 
4572     Level: advanced
4573 
4574    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4575 
4576 @*/
4577 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4578 {
4579   PetscErrorCode ierr;
4580   PetscMPIInt    size;
4581 
4582   PetscFunctionBegin;
4583   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4584   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4585   if (size == 1) {
4586     if (scall == MAT_INITIAL_MATRIX) {
4587       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4588     } else {
4589       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4590     }
4591   } else {
4592     if (scall == MAT_INITIAL_MATRIX) {
4593       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4594     }
4595     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4596   }
4597   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 #undef __FUNCT__
4602 #define __FUNCT__ "MatFileSplit"
4603 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4604 {
4605   PetscErrorCode    ierr;
4606   PetscMPIInt       rank;
4607   PetscInt          m,N,i,rstart,nnz;
4608   size_t            len;
4609   const PetscInt    *indx;
4610   PetscViewer       out;
4611   char              *name;
4612   Mat               B;
4613   const PetscScalar *values;
4614 
4615   PetscFunctionBegin;
4616   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4617   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4618   /* Should this be the type of the diagonal block of A? */
4619   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4620   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4621   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4622   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4623   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4624   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4625   for (i=0; i<m; i++) {
4626     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4627     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4628     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4629   }
4630   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4632 
4633   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4634   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4635   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4636   sprintf(name,"%s.%d",outfile,rank);
4637   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4638   ierr = PetscFree(name);CHKERRQ(ierr);
4639   ierr = MatView(B,out);CHKERRQ(ierr);
4640   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4641   ierr = MatDestroy(&B);CHKERRQ(ierr);
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4646 #undef __FUNCT__
4647 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4648 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4649 {
4650   PetscErrorCode      ierr;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4656   if (container) {
4657     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4670     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4671     ierr = PetscFree(merge);CHKERRQ(ierr);
4672     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4673   }
4674   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4675   PetscFunctionReturn(0);
4676 }
4677 
4678 #include <../src/mat/utils/freespace.h>
4679 #include <petscbt.h>
4680 
4681 #undef __FUNCT__
4682 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4683 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4684 {
4685   PetscErrorCode      ierr;
4686   MPI_Comm            comm;
4687   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4688   PetscMPIInt         size,rank,taga,*len_s;
4689   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4690   PetscInt            proc,m;
4691   PetscInt            **buf_ri,**buf_rj;
4692   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4693   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4694   MPI_Request         *s_waits,*r_waits;
4695   MPI_Status          *status;
4696   MatScalar           *aa=a->a;
4697   MatScalar           **abuf_r,*ba_i;
4698   Mat_Merge_SeqsToMPI *merge;
4699   PetscContainer      container;
4700 
4701   PetscFunctionBegin;
4702   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4703   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4704 
4705   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4707 
4708   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4709   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4710 
4711   bi     = merge->bi;
4712   bj     = merge->bj;
4713   buf_ri = merge->buf_ri;
4714   buf_rj = merge->buf_rj;
4715 
4716   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4717   owners = merge->rowmap->range;
4718   len_s  = merge->len_s;
4719 
4720   /* send and recv matrix values */
4721   /*-----------------------------*/
4722   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4723   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4724 
4725   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4726   for (proc=0,k=0; proc<size; proc++) {
4727     if (!len_s[proc]) continue;
4728     i    = owners[proc];
4729     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4730     k++;
4731   }
4732 
4733   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4734   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4735   ierr = PetscFree(status);CHKERRQ(ierr);
4736 
4737   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4738   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4739 
4740   /* insert mat values of mpimat */
4741   /*----------------------------*/
4742   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4743   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4744 
4745   for (k=0; k<merge->nrecv; k++) {
4746     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4747     nrows       = *(buf_ri_k[k]);
4748     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4749     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4750   }
4751 
4752   /* set values of ba */
4753   m = merge->rowmap->n;
4754   for (i=0; i<m; i++) {
4755     arow = owners[rank] + i;
4756     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4757     bnzi = bi[i+1] - bi[i];
4758     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4759 
4760     /* add local non-zero vals of this proc's seqmat into ba */
4761     anzi   = ai[arow+1] - ai[arow];
4762     aj     = a->j + ai[arow];
4763     aa     = a->a + ai[arow];
4764     nextaj = 0;
4765     for (j=0; nextaj<anzi; j++) {
4766       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4767         ba_i[j] += aa[nextaj++];
4768       }
4769     }
4770 
4771     /* add received vals into ba */
4772     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4773       /* i-th row */
4774       if (i == *nextrow[k]) {
4775         anzi   = *(nextai[k]+1) - *nextai[k];
4776         aj     = buf_rj[k] + *(nextai[k]);
4777         aa     = abuf_r[k] + *(nextai[k]);
4778         nextaj = 0;
4779         for (j=0; nextaj<anzi; j++) {
4780           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4781             ba_i[j] += aa[nextaj++];
4782           }
4783         }
4784         nextrow[k]++; nextai[k]++;
4785       }
4786     }
4787     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4788   }
4789   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4790   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4791 
4792   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4793   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4794   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4795   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4796   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4797   PetscFunctionReturn(0);
4798 }
4799 
4800 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4801 
4802 #undef __FUNCT__
4803 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4804 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4805 {
4806   PetscErrorCode      ierr;
4807   Mat                 B_mpi;
4808   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4809   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4810   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4811   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4812   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4813   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4814   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4815   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4816   MPI_Status          *status;
4817   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4818   PetscBT             lnkbt;
4819   Mat_Merge_SeqsToMPI *merge;
4820   PetscContainer      container;
4821 
4822   PetscFunctionBegin;
4823   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4824 
4825   /* make sure it is a PETSc comm */
4826   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4827   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4828   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4829 
4830   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4831   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4832 
4833   /* determine row ownership */
4834   /*---------------------------------------------------------*/
4835   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4836   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4837   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4838   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4839   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4840   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4841   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4842 
4843   m      = merge->rowmap->n;
4844   owners = merge->rowmap->range;
4845 
4846   /* determine the number of messages to send, their lengths */
4847   /*---------------------------------------------------------*/
4848   len_s = merge->len_s;
4849 
4850   len          = 0; /* length of buf_si[] */
4851   merge->nsend = 0;
4852   for (proc=0; proc<size; proc++) {
4853     len_si[proc] = 0;
4854     if (proc == rank) {
4855       len_s[proc] = 0;
4856     } else {
4857       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4858       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4859     }
4860     if (len_s[proc]) {
4861       merge->nsend++;
4862       nrows = 0;
4863       for (i=owners[proc]; i<owners[proc+1]; i++) {
4864         if (ai[i+1] > ai[i]) nrows++;
4865       }
4866       len_si[proc] = 2*(nrows+1);
4867       len         += len_si[proc];
4868     }
4869   }
4870 
4871   /* determine the number and length of messages to receive for ij-structure */
4872   /*-------------------------------------------------------------------------*/
4873   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4874   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4875 
4876   /* post the Irecv of j-structure */
4877   /*-------------------------------*/
4878   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4879   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4880 
4881   /* post the Isend of j-structure */
4882   /*--------------------------------*/
4883   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4884 
4885   for (proc=0, k=0; proc<size; proc++) {
4886     if (!len_s[proc]) continue;
4887     i    = owners[proc];
4888     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4889     k++;
4890   }
4891 
4892   /* receives and sends of j-structure are complete */
4893   /*------------------------------------------------*/
4894   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4895   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4896 
4897   /* send and recv i-structure */
4898   /*---------------------------*/
4899   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4900   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4901 
4902   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4903   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4904   for (proc=0,k=0; proc<size; proc++) {
4905     if (!len_s[proc]) continue;
4906     /* form outgoing message for i-structure:
4907          buf_si[0]:                 nrows to be sent
4908                [1:nrows]:           row index (global)
4909                [nrows+1:2*nrows+1]: i-structure index
4910     */
4911     /*-------------------------------------------*/
4912     nrows       = len_si[proc]/2 - 1;
4913     buf_si_i    = buf_si + nrows+1;
4914     buf_si[0]   = nrows;
4915     buf_si_i[0] = 0;
4916     nrows       = 0;
4917     for (i=owners[proc]; i<owners[proc+1]; i++) {
4918       anzi = ai[i+1] - ai[i];
4919       if (anzi) {
4920         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4921         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4922         nrows++;
4923       }
4924     }
4925     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4926     k++;
4927     buf_si += len_si[proc];
4928   }
4929 
4930   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4931   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4932 
4933   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4934   for (i=0; i<merge->nrecv; i++) {
4935     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4936   }
4937 
4938   ierr = PetscFree(len_si);CHKERRQ(ierr);
4939   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4940   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4941   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4942   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4943   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4944   ierr = PetscFree(status);CHKERRQ(ierr);
4945 
4946   /* compute a local seq matrix in each processor */
4947   /*----------------------------------------------*/
4948   /* allocate bi array and free space for accumulating nonzero column info */
4949   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4950   bi[0] = 0;
4951 
4952   /* create and initialize a linked list */
4953   nlnk = N+1;
4954   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4955 
4956   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4957   len  = ai[owners[rank+1]] - ai[owners[rank]];
4958   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4959 
4960   current_space = free_space;
4961 
4962   /* determine symbolic info for each local row */
4963   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4964 
4965   for (k=0; k<merge->nrecv; k++) {
4966     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4967     nrows       = *buf_ri_k[k];
4968     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4969     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4970   }
4971 
4972   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4973   len  = 0;
4974   for (i=0; i<m; i++) {
4975     bnzi = 0;
4976     /* add local non-zero cols of this proc's seqmat into lnk */
4977     arow  = owners[rank] + i;
4978     anzi  = ai[arow+1] - ai[arow];
4979     aj    = a->j + ai[arow];
4980     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4981     bnzi += nlnk;
4982     /* add received col data into lnk */
4983     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4984       if (i == *nextrow[k]) { /* i-th row */
4985         anzi  = *(nextai[k]+1) - *nextai[k];
4986         aj    = buf_rj[k] + *nextai[k];
4987         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4988         bnzi += nlnk;
4989         nextrow[k]++; nextai[k]++;
4990       }
4991     }
4992     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4993 
4994     /* if free space is not available, make more free space */
4995     if (current_space->local_remaining<bnzi) {
4996       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4997       nspacedouble++;
4998     }
4999     /* copy data into free space, then initialize lnk */
5000     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5001     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5002 
5003     current_space->array           += bnzi;
5004     current_space->local_used      += bnzi;
5005     current_space->local_remaining -= bnzi;
5006 
5007     bi[i+1] = bi[i] + bnzi;
5008   }
5009 
5010   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5011 
5012   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5013   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5014   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5015 
5016   /* create symbolic parallel matrix B_mpi */
5017   /*---------------------------------------*/
5018   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5019   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5020   if (n==PETSC_DECIDE) {
5021     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5022   } else {
5023     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5024   }
5025   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5026   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5027   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5028   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5029   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5030 
5031   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5032   B_mpi->assembled    = PETSC_FALSE;
5033   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5034   merge->bi           = bi;
5035   merge->bj           = bj;
5036   merge->buf_ri       = buf_ri;
5037   merge->buf_rj       = buf_rj;
5038   merge->coi          = NULL;
5039   merge->coj          = NULL;
5040   merge->owners_co    = NULL;
5041 
5042   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5043 
5044   /* attach the supporting struct to B_mpi for reuse */
5045   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5046   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5047   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5048   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5049   *mpimat = B_mpi;
5050 
5051   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5052   PetscFunctionReturn(0);
5053 }
5054 
5055 #undef __FUNCT__
5056 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5057 /*@C
5058       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5059                  matrices from each processor
5060 
5061     Collective on MPI_Comm
5062 
5063    Input Parameters:
5064 +    comm - the communicators the parallel matrix will live on
5065 .    seqmat - the input sequential matrices
5066 .    m - number of local rows (or PETSC_DECIDE)
5067 .    n - number of local columns (or PETSC_DECIDE)
5068 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5069 
5070    Output Parameter:
5071 .    mpimat - the parallel matrix generated
5072 
5073     Level: advanced
5074 
5075    Notes:
5076      The dimensions of the sequential matrix in each processor MUST be the same.
5077      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5078      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5079 @*/
5080 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5081 {
5082   PetscErrorCode ierr;
5083   PetscMPIInt    size;
5084 
5085   PetscFunctionBegin;
5086   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5087   if (size == 1) {
5088     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5089     if (scall == MAT_INITIAL_MATRIX) {
5090       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5091     } else {
5092       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5093     }
5094     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5095     PetscFunctionReturn(0);
5096   }
5097   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5098   if (scall == MAT_INITIAL_MATRIX) {
5099     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5100   }
5101   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5102   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5103   PetscFunctionReturn(0);
5104 }
5105 
5106 #undef __FUNCT__
5107 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5108 /*@
5109      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5110           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5111           with MatGetSize()
5112 
5113     Not Collective
5114 
5115    Input Parameters:
5116 +    A - the matrix
5117 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5118 
5119    Output Parameter:
5120 .    A_loc - the local sequential matrix generated
5121 
5122     Level: developer
5123 
5124 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5125 
5126 @*/
5127 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5128 {
5129   PetscErrorCode ierr;
5130   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5131   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5132   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5133   MatScalar      *aa=a->a,*ba=b->a,*cam;
5134   PetscScalar    *ca;
5135   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5136   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5137   PetscBool      match;
5138 
5139   PetscFunctionBegin;
5140   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5141   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5142   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5143   if (scall == MAT_INITIAL_MATRIX) {
5144     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5145     ci[0] = 0;
5146     for (i=0; i<am; i++) {
5147       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5148     }
5149     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5150     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5151     k    = 0;
5152     for (i=0; i<am; i++) {
5153       ncols_o = bi[i+1] - bi[i];
5154       ncols_d = ai[i+1] - ai[i];
5155       /* off-diagonal portion of A */
5156       for (jo=0; jo<ncols_o; jo++) {
5157         col = cmap[*bj];
5158         if (col >= cstart) break;
5159         cj[k]   = col; bj++;
5160         ca[k++] = *ba++;
5161       }
5162       /* diagonal portion of A */
5163       for (j=0; j<ncols_d; j++) {
5164         cj[k]   = cstart + *aj++;
5165         ca[k++] = *aa++;
5166       }
5167       /* off-diagonal portion of A */
5168       for (j=jo; j<ncols_o; j++) {
5169         cj[k]   = cmap[*bj++];
5170         ca[k++] = *ba++;
5171       }
5172     }
5173     /* put together the new matrix */
5174     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5175     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5176     /* Since these are PETSc arrays, change flags to free them as necessary. */
5177     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5178     mat->free_a  = PETSC_TRUE;
5179     mat->free_ij = PETSC_TRUE;
5180     mat->nonew   = 0;
5181   } else if (scall == MAT_REUSE_MATRIX) {
5182     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5183     ci = mat->i; cj = mat->j; cam = mat->a;
5184     for (i=0; i<am; i++) {
5185       /* off-diagonal portion of A */
5186       ncols_o = bi[i+1] - bi[i];
5187       for (jo=0; jo<ncols_o; jo++) {
5188         col = cmap[*bj];
5189         if (col >= cstart) break;
5190         *cam++ = *ba++; bj++;
5191       }
5192       /* diagonal portion of A */
5193       ncols_d = ai[i+1] - ai[i];
5194       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5195       /* off-diagonal portion of A */
5196       for (j=jo; j<ncols_o; j++) {
5197         *cam++ = *ba++; bj++;
5198       }
5199     }
5200   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5201   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 #undef __FUNCT__
5206 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5207 /*@C
5208      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5209 
5210     Not Collective
5211 
5212    Input Parameters:
5213 +    A - the matrix
5214 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5215 -    row, col - index sets of rows and columns to extract (or NULL)
5216 
5217    Output Parameter:
5218 .    A_loc - the local sequential matrix generated
5219 
5220     Level: developer
5221 
5222 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5223 
5224 @*/
5225 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5226 {
5227   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5228   PetscErrorCode ierr;
5229   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5230   IS             isrowa,iscola;
5231   Mat            *aloc;
5232   PetscBool      match;
5233 
5234   PetscFunctionBegin;
5235   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5236   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5237   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5238   if (!row) {
5239     start = A->rmap->rstart; end = A->rmap->rend;
5240     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5241   } else {
5242     isrowa = *row;
5243   }
5244   if (!col) {
5245     start = A->cmap->rstart;
5246     cmap  = a->garray;
5247     nzA   = a->A->cmap->n;
5248     nzB   = a->B->cmap->n;
5249     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5250     ncols = 0;
5251     for (i=0; i<nzB; i++) {
5252       if (cmap[i] < start) idx[ncols++] = cmap[i];
5253       else break;
5254     }
5255     imark = i;
5256     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5257     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5258     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5259   } else {
5260     iscola = *col;
5261   }
5262   if (scall != MAT_INITIAL_MATRIX) {
5263     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5264     aloc[0] = *A_loc;
5265   }
5266   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5267   *A_loc = aloc[0];
5268   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5269   if (!row) {
5270     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5271   }
5272   if (!col) {
5273     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5274   }
5275   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5276   PetscFunctionReturn(0);
5277 }
5278 
5279 #undef __FUNCT__
5280 #define __FUNCT__ "MatGetBrowsOfAcols"
5281 /*@C
5282     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5283 
5284     Collective on Mat
5285 
5286    Input Parameters:
5287 +    A,B - the matrices in mpiaij format
5288 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5289 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5290 
5291    Output Parameter:
5292 +    rowb, colb - index sets of rows and columns of B to extract
5293 -    B_seq - the sequential matrix generated
5294 
5295     Level: developer
5296 
5297 @*/
5298 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5299 {
5300   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5301   PetscErrorCode ierr;
5302   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5303   IS             isrowb,iscolb;
5304   Mat            *bseq=NULL;
5305 
5306   PetscFunctionBegin;
5307   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5308     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5309   }
5310   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5311 
5312   if (scall == MAT_INITIAL_MATRIX) {
5313     start = A->cmap->rstart;
5314     cmap  = a->garray;
5315     nzA   = a->A->cmap->n;
5316     nzB   = a->B->cmap->n;
5317     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5318     ncols = 0;
5319     for (i=0; i<nzB; i++) {  /* row < local row index */
5320       if (cmap[i] < start) idx[ncols++] = cmap[i];
5321       else break;
5322     }
5323     imark = i;
5324     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5325     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5326     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5327     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5328   } else {
5329     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5330     isrowb  = *rowb; iscolb = *colb;
5331     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5332     bseq[0] = *B_seq;
5333   }
5334   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5335   *B_seq = bseq[0];
5336   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5337   if (!rowb) {
5338     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5339   } else {
5340     *rowb = isrowb;
5341   }
5342   if (!colb) {
5343     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5344   } else {
5345     *colb = iscolb;
5346   }
5347   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5348   PetscFunctionReturn(0);
5349 }
5350 
5351 #undef __FUNCT__
5352 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5353 /*
5354     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5355     of the OFF-DIAGONAL portion of local A
5356 
5357     Collective on Mat
5358 
5359    Input Parameters:
5360 +    A,B - the matrices in mpiaij format
5361 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5362 
5363    Output Parameter:
5364 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5365 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5366 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5367 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5368 
5369     Level: developer
5370 
5371 */
5372 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5373 {
5374   VecScatter_MPI_General *gen_to,*gen_from;
5375   PetscErrorCode         ierr;
5376   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5377   Mat_SeqAIJ             *b_oth;
5378   VecScatter             ctx =a->Mvctx;
5379   MPI_Comm               comm;
5380   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5381   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5382   PetscScalar            *rvalues,*svalues;
5383   MatScalar              *b_otha,*bufa,*bufA;
5384   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5385   MPI_Request            *rwaits = NULL,*swaits = NULL;
5386   MPI_Status             *sstatus,rstatus;
5387   PetscMPIInt            jj;
5388   PetscInt               *cols,sbs,rbs;
5389   PetscScalar            *vals;
5390 
5391   PetscFunctionBegin;
5392   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5393   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5394     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5395   }
5396   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5397   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5398 
5399   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5400   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5401   rvalues  = gen_from->values; /* holds the length of receiving row */
5402   svalues  = gen_to->values;   /* holds the length of sending row */
5403   nrecvs   = gen_from->n;
5404   nsends   = gen_to->n;
5405 
5406   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5407   srow    = gen_to->indices;    /* local row index to be sent */
5408   sstarts = gen_to->starts;
5409   sprocs  = gen_to->procs;
5410   sstatus = gen_to->sstatus;
5411   sbs     = gen_to->bs;
5412   rstarts = gen_from->starts;
5413   rprocs  = gen_from->procs;
5414   rbs     = gen_from->bs;
5415 
5416   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5417   if (scall == MAT_INITIAL_MATRIX) {
5418     /* i-array */
5419     /*---------*/
5420     /*  post receives */
5421     for (i=0; i<nrecvs; i++) {
5422       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5423       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5424       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5425     }
5426 
5427     /* pack the outgoing message */
5428     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5429 
5430     sstartsj[0] = 0;
5431     rstartsj[0] = 0;
5432     len         = 0; /* total length of j or a array to be sent */
5433     k           = 0;
5434     for (i=0; i<nsends; i++) {
5435       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5436       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5437       for (j=0; j<nrows; j++) {
5438         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5439         for (l=0; l<sbs; l++) {
5440           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5441 
5442           rowlen[j*sbs+l] = ncols;
5443 
5444           len += ncols;
5445           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5446         }
5447         k++;
5448       }
5449       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5450 
5451       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5452     }
5453     /* recvs and sends of i-array are completed */
5454     i = nrecvs;
5455     while (i--) {
5456       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5457     }
5458     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5459 
5460     /* allocate buffers for sending j and a arrays */
5461     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5462     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5463 
5464     /* create i-array of B_oth */
5465     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5466 
5467     b_othi[0] = 0;
5468     len       = 0; /* total length of j or a array to be received */
5469     k         = 0;
5470     for (i=0; i<nrecvs; i++) {
5471       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5472       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5473       for (j=0; j<nrows; j++) {
5474         b_othi[k+1] = b_othi[k] + rowlen[j];
5475         len        += rowlen[j]; k++;
5476       }
5477       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5478     }
5479 
5480     /* allocate space for j and a arrrays of B_oth */
5481     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5482     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5483 
5484     /* j-array */
5485     /*---------*/
5486     /*  post receives of j-array */
5487     for (i=0; i<nrecvs; i++) {
5488       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5489       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5490     }
5491 
5492     /* pack the outgoing message j-array */
5493     k = 0;
5494     for (i=0; i<nsends; i++) {
5495       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5496       bufJ  = bufj+sstartsj[i];
5497       for (j=0; j<nrows; j++) {
5498         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5499         for (ll=0; ll<sbs; ll++) {
5500           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5501           for (l=0; l<ncols; l++) {
5502             *bufJ++ = cols[l];
5503           }
5504           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5505         }
5506       }
5507       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5508     }
5509 
5510     /* recvs and sends of j-array are completed */
5511     i = nrecvs;
5512     while (i--) {
5513       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5514     }
5515     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5516   } else if (scall == MAT_REUSE_MATRIX) {
5517     sstartsj = *startsj_s;
5518     rstartsj = *startsj_r;
5519     bufa     = *bufa_ptr;
5520     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5521     b_otha   = b_oth->a;
5522   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5523 
5524   /* a-array */
5525   /*---------*/
5526   /*  post receives of a-array */
5527   for (i=0; i<nrecvs; i++) {
5528     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5529     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5530   }
5531 
5532   /* pack the outgoing message a-array */
5533   k = 0;
5534   for (i=0; i<nsends; i++) {
5535     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5536     bufA  = bufa+sstartsj[i];
5537     for (j=0; j<nrows; j++) {
5538       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5539       for (ll=0; ll<sbs; ll++) {
5540         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5541         for (l=0; l<ncols; l++) {
5542           *bufA++ = vals[l];
5543         }
5544         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5545       }
5546     }
5547     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5548   }
5549   /* recvs and sends of a-array are completed */
5550   i = nrecvs;
5551   while (i--) {
5552     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5553   }
5554   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5555   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5556 
5557   if (scall == MAT_INITIAL_MATRIX) {
5558     /* put together the new matrix */
5559     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5560 
5561     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5562     /* Since these are PETSc arrays, change flags to free them as necessary. */
5563     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5564     b_oth->free_a  = PETSC_TRUE;
5565     b_oth->free_ij = PETSC_TRUE;
5566     b_oth->nonew   = 0;
5567 
5568     ierr = PetscFree(bufj);CHKERRQ(ierr);
5569     if (!startsj_s || !bufa_ptr) {
5570       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5571       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5572     } else {
5573       *startsj_s = sstartsj;
5574       *startsj_r = rstartsj;
5575       *bufa_ptr  = bufa;
5576     }
5577   }
5578   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5579   PetscFunctionReturn(0);
5580 }
5581 
5582 #undef __FUNCT__
5583 #define __FUNCT__ "MatGetCommunicationStructs"
5584 /*@C
5585   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5586 
5587   Not Collective
5588 
5589   Input Parameters:
5590 . A - The matrix in mpiaij format
5591 
5592   Output Parameter:
5593 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5594 . colmap - A map from global column index to local index into lvec
5595 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5596 
5597   Level: developer
5598 
5599 @*/
5600 #if defined(PETSC_USE_CTABLE)
5601 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5602 #else
5603 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5604 #endif
5605 {
5606   Mat_MPIAIJ *a;
5607 
5608   PetscFunctionBegin;
5609   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5610   PetscValidPointer(lvec, 2);
5611   PetscValidPointer(colmap, 3);
5612   PetscValidPointer(multScatter, 4);
5613   a = (Mat_MPIAIJ*) A->data;
5614   if (lvec) *lvec = a->lvec;
5615   if (colmap) *colmap = a->colmap;
5616   if (multScatter) *multScatter = a->Mvctx;
5617   PetscFunctionReturn(0);
5618 }
5619 
5620 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5621 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5622 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5623 
5624 #undef __FUNCT__
5625 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5626 /*
5627     Computes (B'*A')' since computing B*A directly is untenable
5628 
5629                n                       p                          p
5630         (              )       (              )         (                  )
5631       m (      A       )  *  n (       B      )   =   m (         C        )
5632         (              )       (              )         (                  )
5633 
5634 */
5635 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5636 {
5637   PetscErrorCode ierr;
5638   Mat            At,Bt,Ct;
5639 
5640   PetscFunctionBegin;
5641   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5642   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5643   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5644   ierr = MatDestroy(&At);CHKERRQ(ierr);
5645   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5646   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5647   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5648   PetscFunctionReturn(0);
5649 }
5650 
5651 #undef __FUNCT__
5652 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5653 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5654 {
5655   PetscErrorCode ierr;
5656   PetscInt       m=A->rmap->n,n=B->cmap->n;
5657   Mat            Cmat;
5658 
5659   PetscFunctionBegin;
5660   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5661   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5662   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5663   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5664   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5665   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5666   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5667   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5668 
5669   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5670 
5671   *C = Cmat;
5672   PetscFunctionReturn(0);
5673 }
5674 
5675 /* ----------------------------------------------------------------*/
5676 #undef __FUNCT__
5677 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5678 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5679 {
5680   PetscErrorCode ierr;
5681 
5682   PetscFunctionBegin;
5683   if (scall == MAT_INITIAL_MATRIX) {
5684     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5685     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5686     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5687   }
5688   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5689   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5690   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 #if defined(PETSC_HAVE_MUMPS)
5695 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5696 #endif
5697 #if defined(PETSC_HAVE_PASTIX)
5698 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5699 #endif
5700 #if defined(PETSC_HAVE_SUPERLU_DIST)
5701 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5702 #endif
5703 #if defined(PETSC_HAVE_CLIQUE)
5704 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5705 #endif
5706 
5707 /*MC
5708    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5709 
5710    Options Database Keys:
5711 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5712 
5713   Level: beginner
5714 
5715 .seealso: MatCreateAIJ()
5716 M*/
5717 
5718 #undef __FUNCT__
5719 #define __FUNCT__ "MatCreate_MPIAIJ"
5720 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5721 {
5722   Mat_MPIAIJ     *b;
5723   PetscErrorCode ierr;
5724   PetscMPIInt    size;
5725 
5726   PetscFunctionBegin;
5727   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5728 
5729   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5730   B->data       = (void*)b;
5731   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5732   B->assembled  = PETSC_FALSE;
5733   B->insertmode = NOT_SET_VALUES;
5734   b->size       = size;
5735 
5736   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5737 
5738   /* build cache for off array entries formed */
5739   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5740 
5741   b->donotstash  = PETSC_FALSE;
5742   b->colmap      = 0;
5743   b->garray      = 0;
5744   b->roworiented = PETSC_TRUE;
5745 
5746   /* stuff used for matrix vector multiply */
5747   b->lvec  = NULL;
5748   b->Mvctx = NULL;
5749 
5750   /* stuff for MatGetRow() */
5751   b->rowindices   = 0;
5752   b->rowvalues    = 0;
5753   b->getrowactive = PETSC_FALSE;
5754 
5755   /* flexible pointer used in CUSP/CUSPARSE classes */
5756   b->spptr = NULL;
5757 
5758 #if defined(PETSC_HAVE_MUMPS)
5759   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5760 #endif
5761 #if defined(PETSC_HAVE_PASTIX)
5762   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5763 #endif
5764 #if defined(PETSC_HAVE_SUPERLU_DIST)
5765   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5766 #endif
5767 #if defined(PETSC_HAVE_CLIQUE)
5768   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5769 #endif
5770   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5771   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5772   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5773   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5774   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5775   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5776   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5777   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5778   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5779   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5780   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5781   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5782   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5783   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5784   PetscFunctionReturn(0);
5785 }
5786 
5787 #undef __FUNCT__
5788 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5789 /*@
5790      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5791          and "off-diagonal" part of the matrix in CSR format.
5792 
5793    Collective on MPI_Comm
5794 
5795    Input Parameters:
5796 +  comm - MPI communicator
5797 .  m - number of local rows (Cannot be PETSC_DECIDE)
5798 .  n - This value should be the same as the local size used in creating the
5799        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5800        calculated if N is given) For square matrices n is almost always m.
5801 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5802 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5803 .   i - row indices for "diagonal" portion of matrix
5804 .   j - column indices
5805 .   a - matrix values
5806 .   oi - row indices for "off-diagonal" portion of matrix
5807 .   oj - column indices
5808 -   oa - matrix values
5809 
5810    Output Parameter:
5811 .   mat - the matrix
5812 
5813    Level: advanced
5814 
5815    Notes:
5816        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5817        must free the arrays once the matrix has been destroyed and not before.
5818 
5819        The i and j indices are 0 based
5820 
5821        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5822 
5823        This sets local rows and cannot be used to set off-processor values.
5824 
5825        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5826        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5827        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5828        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5829        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5830        communication if it is known that only local entries will be set.
5831 
5832 .keywords: matrix, aij, compressed row, sparse, parallel
5833 
5834 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5835           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5836 @*/
5837 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5838 {
5839   PetscErrorCode ierr;
5840   Mat_MPIAIJ     *maij;
5841 
5842   PetscFunctionBegin;
5843   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5844   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5845   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5846   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5847   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5848   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5849   maij = (Mat_MPIAIJ*) (*mat)->data;
5850 
5851   (*mat)->preallocated = PETSC_TRUE;
5852 
5853   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5854   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5855 
5856   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5857   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5858 
5859   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5860   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5861   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5862   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5863 
5864   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5865   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5866   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5867   PetscFunctionReturn(0);
5868 }
5869 
5870 /*
5871     Special version for direct calls from Fortran
5872 */
5873 #include <petsc-private/fortranimpl.h>
5874 
5875 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5876 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5877 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5878 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5879 #endif
5880 
5881 /* Change these macros so can be used in void function */
5882 #undef CHKERRQ
5883 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5884 #undef SETERRQ2
5885 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5886 #undef SETERRQ3
5887 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5888 #undef SETERRQ
5889 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5890 
5891 #undef __FUNCT__
5892 #define __FUNCT__ "matsetvaluesmpiaij_"
5893 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5894 {
5895   Mat            mat  = *mmat;
5896   PetscInt       m    = *mm, n = *mn;
5897   InsertMode     addv = *maddv;
5898   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5899   PetscScalar    value;
5900   PetscErrorCode ierr;
5901 
5902   MatCheckPreallocated(mat,1);
5903   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5904 
5905 #if defined(PETSC_USE_DEBUG)
5906   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5907 #endif
5908   {
5909     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5910     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5911     PetscBool roworiented = aij->roworiented;
5912 
5913     /* Some Variables required in the macro */
5914     Mat        A                 = aij->A;
5915     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5916     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5917     MatScalar  *aa               = a->a;
5918     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5919     Mat        B                 = aij->B;
5920     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5921     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5922     MatScalar  *ba               = b->a;
5923 
5924     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5925     PetscInt  nonew = a->nonew;
5926     MatScalar *ap1,*ap2;
5927 
5928     PetscFunctionBegin;
5929     for (i=0; i<m; i++) {
5930       if (im[i] < 0) continue;
5931 #if defined(PETSC_USE_DEBUG)
5932       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5933 #endif
5934       if (im[i] >= rstart && im[i] < rend) {
5935         row      = im[i] - rstart;
5936         lastcol1 = -1;
5937         rp1      = aj + ai[row];
5938         ap1      = aa + ai[row];
5939         rmax1    = aimax[row];
5940         nrow1    = ailen[row];
5941         low1     = 0;
5942         high1    = nrow1;
5943         lastcol2 = -1;
5944         rp2      = bj + bi[row];
5945         ap2      = ba + bi[row];
5946         rmax2    = bimax[row];
5947         nrow2    = bilen[row];
5948         low2     = 0;
5949         high2    = nrow2;
5950 
5951         for (j=0; j<n; j++) {
5952           if (roworiented) value = v[i*n+j];
5953           else value = v[i+j*m];
5954           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5955           if (in[j] >= cstart && in[j] < cend) {
5956             col = in[j] - cstart;
5957             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5958           } else if (in[j] < 0) continue;
5959 #if defined(PETSC_USE_DEBUG)
5960           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5961 #endif
5962           else {
5963             if (mat->was_assembled) {
5964               if (!aij->colmap) {
5965                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5966               }
5967 #if defined(PETSC_USE_CTABLE)
5968               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5969               col--;
5970 #else
5971               col = aij->colmap[in[j]] - 1;
5972 #endif
5973               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5974                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5975                 col  =  in[j];
5976                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5977                 B     = aij->B;
5978                 b     = (Mat_SeqAIJ*)B->data;
5979                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5980                 rp2   = bj + bi[row];
5981                 ap2   = ba + bi[row];
5982                 rmax2 = bimax[row];
5983                 nrow2 = bilen[row];
5984                 low2  = 0;
5985                 high2 = nrow2;
5986                 bm    = aij->B->rmap->n;
5987                 ba    = b->a;
5988               }
5989             } else col = in[j];
5990             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5991           }
5992         }
5993       } else if (!aij->donotstash) {
5994         if (roworiented) {
5995           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5996         } else {
5997           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5998         }
5999       }
6000     }
6001   }
6002   PetscFunctionReturnVoid();
6003 }
6004 
6005