xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b28073d3b1e033da78b92bf8215c324cf8d5e643)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     if (!rank) {
206       bses[0] = gmat->rmap->bs;
207       bses[1] = gmat->cmap->bs;
208     }
209     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
210     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
211     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
212     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
213     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
214     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
215 
216     rowners[0] = 0;
217     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218     rstart = rowners[rank];
219     rend   = rowners[rank+1];
220     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
221     if (!rank) {
222       gmata = (Mat_SeqAIJ*) gmat->data;
223       /* send row lengths to all processors */
224       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225       for (i=1; i<size; i++) {
226         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
227       }
228       /* determine number diagonal and off-diagonal counts */
229       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
230       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined(PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
373   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374 #endif
375   PetscFunctionReturn(0);
376 }
377 
378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379 { \
380     if (col <= lastcol1)  low1 = 0;     \
381     else                 high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else              low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414   { \
415     if (col <= lastcol2) low2 = 0;                        \
416     else high2 = nrow2;                                   \
417     lastcol2 = col;                                       \
418     while (high2-low2 > 5) {                              \
419       t = (low2+high2)/2;                                 \
420       if (rp2[t] > col) high2 = t;                        \
421       else             low2  = t;                         \
422     }                                                     \
423     for (_i=low2; _i<high2; _i++) {                       \
424       if (rp2[_i] > col) break;                           \
425       if (rp2[_i] == col) {                               \
426         if (addv == ADD_VALUES) ap2[_i] += value;         \
427         else                    ap2[_i] = value;          \
428         goto b_noinsert;                                  \
429       }                                                   \
430     }                                                     \
431     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
433     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
434     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
435     N = nrow2++ - 1; b->nz++; high2++;                    \
436     /* shift up all the later entries in this row */      \
437     for (ii=N; ii>=_i; ii--) {                            \
438       rp2[ii+1] = rp2[ii];                                \
439       ap2[ii+1] = ap2[ii];                                \
440     }                                                     \
441     rp2[_i] = col;                                        \
442     ap2[_i] = value;                                      \
443     b_noinsert: ;                                         \
444     bilen[row] = nrow2;                                   \
445   }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
450 {
451   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
452   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453   PetscErrorCode ierr;
454   PetscInt       l,*garray = mat->garray,diag;
455 
456   PetscFunctionBegin;
457   /* code only works for square matrices A */
458 
459   /* find size of row to the left of the diagonal part */
460   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
461   row  = row - diag;
462   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
463     if (garray[b->j[b->i[row]+l]] > diag) break;
464   }
465   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* diagonal part */
468   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* right of diagonal part */
471   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatSetValues_MPIAIJ"
477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478 {
479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
480   PetscScalar    value;
481   PetscErrorCode ierr;
482   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
483   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
484   PetscBool      roworiented = aij->roworiented;
485 
486   /* Some Variables required in the macro */
487   Mat        A                 = aij->A;
488   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
489   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
490   MatScalar  *aa               = a->a;
491   PetscBool  ignorezeroentries = a->ignorezeroentries;
492   Mat        B                 = aij->B;
493   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
494   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
495   MatScalar  *ba               = b->a;
496 
497   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
498   PetscInt  nonew;
499   MatScalar *ap1,*ap2;
500 
501   PetscFunctionBegin;
502   if (v) PetscValidScalarPointer(v,6);
503   for (i=0; i<m; i++) {
504     if (im[i] < 0) continue;
505 #if defined(PETSC_USE_DEBUG)
506     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
507 #endif
508     if (im[i] >= rstart && im[i] < rend) {
509       row      = im[i] - rstart;
510       lastcol1 = -1;
511       rp1      = aj + ai[row];
512       ap1      = aa + ai[row];
513       rmax1    = aimax[row];
514       nrow1    = ailen[row];
515       low1     = 0;
516       high1    = nrow1;
517       lastcol2 = -1;
518       rp2      = bj + bi[row];
519       ap2      = ba + bi[row];
520       rmax2    = bimax[row];
521       nrow2    = bilen[row];
522       low2     = 0;
523       high2    = nrow2;
524 
525       for (j=0; j<n; j++) {
526         if (v) {
527           if (roworiented) value = v[i*n+j];
528           else             value = v[i+j*m];
529         } else value = 0.0;
530         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
531         if (in[j] >= cstart && in[j] < cend) {
532           col   = in[j] - cstart;
533           nonew = a->nonew;
534           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
535         } else if (in[j] < 0) continue;
536 #if defined(PETSC_USE_DEBUG)
537         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
538 #endif
539         else {
540           if (mat->was_assembled) {
541             if (!aij->colmap) {
542               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
543             }
544 #if defined(PETSC_USE_CTABLE)
545             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
546             col--;
547 #else
548             col = aij->colmap[in[j]] - 1;
549 #endif
550             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
551               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
552               col  =  in[j];
553               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
554               B     = aij->B;
555               b     = (Mat_SeqAIJ*)B->data;
556               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
557               rp2   = bj + bi[row];
558               ap2   = ba + bi[row];
559               rmax2 = bimax[row];
560               nrow2 = bilen[row];
561               low2  = 0;
562               high2 = nrow2;
563               bm    = aij->B->rmap->n;
564               ba    = b->a;
565             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
566           } else col = in[j];
567           nonew = b->nonew;
568           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
569         }
570       }
571     } else {
572       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
573       if (!aij->donotstash) {
574         mat->assembled = PETSC_FALSE;
575         if (roworiented) {
576           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
577         } else {
578           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         }
580       }
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatGetValues_MPIAIJ"
588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
589 {
590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
591   PetscErrorCode ierr;
592   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
594 
595   PetscFunctionBegin;
596   for (i=0; i<m; i++) {
597     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
598     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
599     if (idxm[i] >= rstart && idxm[i] < rend) {
600       row = idxm[i] - rstart;
601       for (j=0; j<n; j++) {
602         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
603         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
604         if (idxn[j] >= cstart && idxn[j] < cend) {
605           col  = idxn[j] - cstart;
606           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607         } else {
608           if (!aij->colmap) {
609             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
610           }
611 #if defined(PETSC_USE_CTABLE)
612           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
613           col--;
614 #else
615           col = aij->colmap[idxn[j]] - 1;
616 #endif
617           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
618           else {
619             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620           }
621         }
622       }
623     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       nstash,reallocs;
637   InsertMode     addv;
638 
639   PetscFunctionBegin;
640   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
668 
669   PetscFunctionBegin;
670   if (!aij->donotstash && !mat->nooffprocentries) {
671     while (1) {
672       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
673       if (!flg) break;
674 
675       for (i=0; i<n; ) {
676         /* Now identify the consecutive vals belonging to the same row */
677         for (j=i,rstart=row[j]; j<n; j++) {
678           if (row[j] != rstart) break;
679         }
680         if (j < n) ncols = j-i;
681         else       ncols = n-i;
682         /* Now assemble all these values with a single function call */
683         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
684 
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
710   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
711 
712   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
713 
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
743   PetscInt      *owners = A->rmap->range;
744   PetscInt       n      = A->rmap->n;
745   PetscMPIInt    size   = mat->size;
746   PetscSF        sf;
747   PetscInt      *lrows;
748   PetscSFNode   *rrows;
749   PetscInt       lastidx = -1, r, p = 0, len = 0;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   /* Create SF where leaves are input rows and roots are owned rows */
754   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
755   for (r = 0; r < n; ++r) lrows[r] = -1;
756   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
757   for (r = 0; r < N; ++r) {
758     const PetscInt idx   = rows[r];
759     PetscBool      found = PETSC_FALSE;
760     /* Trick for efficient searching for sorted rows */
761     if (lastidx > idx) p = 0;
762     lastidx = idx;
763     for (; p < size; ++p) {
764       if (idx >= owners[p] && idx < owners[p+1]) {
765         rrows[r].rank  = p;
766         rrows[r].index = rows[r] - owners[p];
767         found = PETSC_TRUE;
768         break;
769       }
770     }
771     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
772   }
773   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775   /* Collect flags for rows to be zeroed */
776   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
777   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
778   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779   /* Compress and put in row numbers */
780   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
794   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
795     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
796   } else if (diag != 0.0) {
797     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
798     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
802     }
803     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805   } else {
806     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
807   }
808   ierr = PetscFree(lrows);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815 {
816   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
817   PetscErrorCode    ierr;
818   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
819   PetscInt          i,j,r,m,p = 0,len = 0;
820   PetscInt          *lrows,*owners = A->rmap->range;
821   PetscSFNode       *rrows;
822   PetscSF           sf;
823   const PetscScalar *xx;
824   PetscScalar       *bb,*mask;
825   Vec               xmask,lmask;
826   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
827   const PetscInt    *aj, *ii,*ridx;
828   PetscScalar       *aa;
829 #if defined(PETSC_DEBUG)
830   PetscBool found = PETSC_FALSE;
831 #endif
832 
833   PetscFunctionBegin;
834   /* Create SF where leaves are input rows and roots are owned rows */
835   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
836   for (r = 0; r < n; ++r) lrows[r] = -1;
837   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
838   for (r = 0; r < N; ++r) {
839     const PetscInt idx   = rows[r];
840     PetscBool      found = PETSC_FALSE;
841     /* Trick for efficient searching for sorted rows */
842     if (lastidx > idx) p = 0;
843     lastidx = idx;
844     for (; p < size; ++p) {
845       if (idx >= owners[p] && idx < owners[p+1]) {
846         rrows[r].rank  = p;
847         rrows[r].index = rows[r] - owners[p];
848         found = PETSC_TRUE;
849         break;
850       }
851     }
852     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
853   }
854   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
855   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
856   /* Collect flags for rows to be zeroed */
857   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
858   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
860   /* Compress and put in row numbers */
861   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
862   /* zero diagonal part of matrix */
863   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
864   /* handle off diagonal part of matrix */
865   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
866   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
867   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
868   for (i=0; i<len; i++) bb[lrows[i]] = 1;
869   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
870   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
873   if (x) {
874     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
877     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
878   }
879   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
880   /* remove zeroed rows of off diagonal matrix */
881   ii = aij->i;
882   for (i=0; i<len; i++) {
883     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
884   }
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i=0; i<m; i++) {
891       n  = ii[i+1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij->a + ii[i];
894 
895       for (j=0; j<n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa*xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i=0; i<m; i++) {
908       n  = ii[i+1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij->a + ii[i];
911       for (j=0; j<n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa*xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x) {
922     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
923     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
924   }
925   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
926   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMult_MPIAIJ"
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 #undef __FUNCT__
962 #define __FUNCT__ "MatMultAdd_MPIAIJ"
963 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
964 {
965   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNCT__
977 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
978 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
979 {
980   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
981   PetscErrorCode ierr;
982   PetscBool      merged;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
986   /* do nondiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
988   if (!merged) {
989     /* send it on its way */
990     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* receive remote parts: note this assumes the values are not actually */
994     /* added in yy until the next line, */
995     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
996   } else {
997     /* do local part */
998     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
999     /* send it on its way */
1000     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001     /* values actually were received in the Begin() but we need to call this nop */
1002     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1009 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1010 {
1011   MPI_Comm       comm;
1012   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1013   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1014   IS             Me,Notme;
1015   PetscErrorCode ierr;
1016   PetscInt       M,N,first,last,*notme,i;
1017   PetscMPIInt    size;
1018 
1019   PetscFunctionBegin;
1020   /* Easy test: symmetric diagonal block */
1021   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1022   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1023   if (!*f) PetscFunctionReturn(0);
1024   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1026   if (size == 1) PetscFunctionReturn(0);
1027 
1028   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1029   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1030   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1031   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1032   for (i=0; i<first; i++) notme[i] = i;
1033   for (i=last; i<M; i++) notme[i-last+first] = i;
1034   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1035   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1036   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1037   Aoff = Aoffs[0];
1038   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1039   Boff = Boffs[0];
1040   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1041   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1043   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1045   ierr = PetscFree(notme);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1051 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1052 {
1053   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1054   PetscErrorCode ierr;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1059   /* send it on its way */
1060   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1061   /* do local part */
1062   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1063   /* receive remote parts */
1064   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 /*
1069   This only works correctly for square matrices where the subblock A->A is the
1070    diagonal block
1071 */
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1074 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1075 {
1076   PetscErrorCode ierr;
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078 
1079   PetscFunctionBegin;
1080   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1081   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1082   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatScale_MPIAIJ"
1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1095   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatDestroy_MPIAIJ"
1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1102 {
1103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107 #if defined(PETSC_USE_LOG)
1108   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1109 #endif
1110   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1111   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1112   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1113   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1114 #if defined(PETSC_USE_CTABLE)
1115   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1116 #else
1117   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1118 #endif
1119   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1120   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1124   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1140 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1141 {
1142   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1143   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1144   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1145   PetscErrorCode ierr;
1146   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1147   int            fd;
1148   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1149   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1150   PetscScalar    *column_values;
1151   PetscInt       message_count,flowcontrolcount;
1152   FILE           *file;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1156   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1157   nz   = A->nz + B->nz;
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1165     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     /* get largest number of rows any processor has */
1167     rlen  = mat->rmap->n;
1168     range = mat->rmap->range;
1169     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1170   } else {
1171     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     rlen = mat->rmap->n;
1173   }
1174 
1175   /* load up the local row counts */
1176   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1177   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1178 
1179   /* store the row lengths to the file */
1180   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1181   if (!rank) {
1182     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1185       rlen = range[i+1] - range[i];
1186       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1192     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1196 
1197   /* load up the local column indices */
1198   nzmax = nz; /* th processor needs space a largest processor needs */
1199   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1200   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1201   cnt   = 0;
1202   for (i=0; i<mat->rmap->n; i++) {
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ((col = garray[B->j[j]]) > cstart) break;
1205       column_indices[cnt++] = col;
1206     }
1207     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1208     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1209   }
1210   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1211 
1212   /* store the column indices to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1221       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     }
1224     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1225   } else {
1226     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1227     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1230   }
1231   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1232 
1233   /* load up the local column values */
1234   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1235   cnt  = 0;
1236   for (i=0; i<mat->rmap->n; i++) {
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       column_values[cnt++] = B->a[j];
1240     }
1241     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1242     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1243   }
1244   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1245 
1246   /* store the column values to the file */
1247   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1248   if (!rank) {
1249     MPI_Status status;
1250     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1251     for (i=1; i<size; i++) {
1252       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1253       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1254       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1255       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1256       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1257     }
1258     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1259   } else {
1260     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1261     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1264   }
1265   ierr = PetscFree(column_values);CHKERRQ(ierr);
1266 
1267   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1268   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #include <petscdraw.h>
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1276 {
1277   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1278   PetscErrorCode    ierr;
1279   PetscMPIInt       rank = aij->rank,size = aij->size;
1280   PetscBool         isdraw,iascii,isbinary;
1281   PetscViewer       sviewer;
1282   PetscViewerFormat format;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1288   if (iascii) {
1289     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1290     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291       MatInfo   info;
1292       PetscBool inodes;
1293 
1294       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1295       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1296       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1298       if (!inodes) {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       } else {
1302         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1303                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1304       }
1305       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1307       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1309       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1310       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1312       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1313       PetscFunctionReturn(0);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt inodecount,inodelimit,*inodes;
1316       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1317       if (inodes) {
1318         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1319       } else {
1320         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1321       }
1322       PetscFunctionReturn(0);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(0);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     } else {
1331       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1332     }
1333     PetscFunctionReturn(0);
1334   } else if (isdraw) {
1335     PetscDraw draw;
1336     PetscBool isnull;
1337     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1338     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1339   }
1340 
1341   if (size == 1) {
1342     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344   } else {
1345     /* assemble the entire matrix onto first processor. */
1346     Mat        A;
1347     Mat_SeqAIJ *Aloc;
1348     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1349     MatScalar  *a;
1350 
1351     if (mat->rmap->N > 1024) {
1352       PetscBool flg = PETSC_FALSE;
1353 
1354       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1355       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1356     }
1357 
1358     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1359     if (!rank) {
1360       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1361     } else {
1362       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1363     }
1364     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1365     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1366     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1367     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1368     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1369 
1370     /* copy over the A part */
1371     Aloc = (Mat_SeqAIJ*)aij->A->data;
1372     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1373     row  = mat->rmap->rstart;
1374     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1375     for (i=0; i<m; i++) {
1376       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1377       row++;
1378       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379     }
1380     aj = Aloc->j;
1381     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1382 
1383     /* copy over the B part */
1384     Aloc = (Mat_SeqAIJ*)aij->B->data;
1385     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1388     ct   = cols;
1389     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1392       row++;
1393       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1394     }
1395     ierr = PetscFree(ct);CHKERRQ(ierr);
1396     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398     /*
1399        Everyone has to call to draw the matrix since the graphics waits are
1400        synchronized across all processors that share the PetscDraw object
1401     */
1402     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1406       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1407       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1410     ierr = MatDestroy(&A);CHKERRQ(ierr);
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatView_MPIAIJ"
1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1418 {
1419   PetscErrorCode ierr;
1420   PetscBool      iascii,isdraw,issocket,isbinary;
1421 
1422   PetscFunctionBegin;
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1427   if (iascii || isdraw || isbinary || issocket) {
1428     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSOR_MPIAIJ"
1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1436 {
1437   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1438   PetscErrorCode ierr;
1439   Vec            bb1 = 0;
1440   PetscBool      hasop;
1441 
1442   PetscFunctionBegin;
1443   if (flag == SOR_APPLY_UPPER) {
1444     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1445     PetscFunctionReturn(0);
1446   }
1447 
1448   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1449     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1450   }
1451 
1452   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457 
1458     while (its--) {
1459       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461 
1462       /* update rhs: bb1 = bb - B*x */
1463       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1464       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1465 
1466       /* local sweep */
1467       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1468     }
1469   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1472       its--;
1473     }
1474     while (its--) {
1475       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1480       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1481 
1482       /* local sweep */
1483       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1484     }
1485   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1486     if (flag & SOR_ZERO_INITIAL_GUESS) {
1487       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488       its--;
1489     }
1490     while (its--) {
1491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493 
1494       /* update rhs: bb1 = bb - B*x */
1495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1497 
1498       /* local sweep */
1499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1500     }
1501   } else if (flag & SOR_EISENSTAT) {
1502     Vec xx1;
1503 
1504     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1505     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1506 
1507     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     if (!mat->diag) {
1510       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1511       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1512     }
1513     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1514     if (hasop) {
1515       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1516     } else {
1517       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1518     }
1519     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1520 
1521     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1522 
1523     /* local sweep */
1524     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1525     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1526     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1527   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1528 
1529   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatPermute_MPIAIJ"
1535 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1536 {
1537   Mat            aA,aB,Aperm;
1538   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1539   PetscScalar    *aa,*ba;
1540   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1541   PetscSF        rowsf,sf;
1542   IS             parcolp = NULL;
1543   PetscBool      done;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1548   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1549   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1550   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1551 
1552   /* Invert row permutation to find out where my rows should go */
1553   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1554   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1555   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1556   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1557   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1559 
1560   /* Invert column permutation to find out where my columns should go */
1561   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1562   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1563   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1564   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1565   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1568 
1569   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1570   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1571   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1572 
1573   /* Find out where my gcols should go */
1574   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1575   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1576   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1577   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1578   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1579   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1580   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1581   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1582 
1583   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1584   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1585   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1586   for (i=0; i<m; i++) {
1587     PetscInt row = rdest[i],rowner;
1588     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1589     for (j=ai[i]; j<ai[i+1]; j++) {
1590       PetscInt cowner,col = cdest[aj[j]];
1591       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1592       if (rowner == cowner) dnnz[i]++;
1593       else onnz[i]++;
1594     }
1595     for (j=bi[i]; j<bi[i+1]; j++) {
1596       PetscInt cowner,col = gcdest[bj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601   }
1602   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1603   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1607 
1608   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1609   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1611   for (i=0; i<m; i++) {
1612     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1613     PetscInt j0,rowlen;
1614     rowlen = ai[i+1] - ai[i];
1615     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1616       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1617       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1618     }
1619     rowlen = bi[i+1] - bi[i];
1620     for (j0=j=0; j<rowlen; j0=j) {
1621       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1622       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1623     }
1624   }
1625   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1630   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1631   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1633   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1634   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1635   *B = Aperm;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1642 {
1643   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1644   Mat            A    = mat->A,B = mat->B;
1645   PetscErrorCode ierr;
1646   PetscReal      isend[5],irecv[5];
1647 
1648   PetscFunctionBegin;
1649   info->block_size = 1.0;
1650   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1651 
1652   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1653   isend[3] = info->memory;  isend[4] = info->mallocs;
1654 
1655   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1656 
1657   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1658   isend[3] += info->memory;  isend[4] += info->mallocs;
1659   if (flag == MAT_LOCAL) {
1660     info->nz_used      = isend[0];
1661     info->nz_allocated = isend[1];
1662     info->nz_unneeded  = isend[2];
1663     info->memory       = isend[3];
1664     info->mallocs      = isend[4];
1665   } else if (flag == MAT_GLOBAL_MAX) {
1666     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1667 
1668     info->nz_used      = irecv[0];
1669     info->nz_allocated = irecv[1];
1670     info->nz_unneeded  = irecv[2];
1671     info->memory       = irecv[3];
1672     info->mallocs      = irecv[4];
1673   } else if (flag == MAT_GLOBAL_SUM) {
1674     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1675 
1676     info->nz_used      = irecv[0];
1677     info->nz_allocated = irecv[1];
1678     info->nz_unneeded  = irecv[2];
1679     info->memory       = irecv[3];
1680     info->mallocs      = irecv[4];
1681   }
1682   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1683   info->fill_ratio_needed = 0;
1684   info->factor_mallocs    = 0;
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatSetOption_MPIAIJ"
1690 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1691 {
1692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   switch (op) {
1697   case MAT_NEW_NONZERO_LOCATIONS:
1698   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1699   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1700   case MAT_KEEP_NONZERO_PATTERN:
1701   case MAT_NEW_NONZERO_LOCATION_ERR:
1702   case MAT_USE_INODES:
1703   case MAT_IGNORE_ZERO_ENTRIES:
1704     MatCheckPreallocated(A,1);
1705     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1706     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1707     break;
1708   case MAT_ROW_ORIENTED:
1709     a->roworiented = flg;
1710 
1711     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1712     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_NEW_DIAGONALS:
1715     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1716     break;
1717   case MAT_IGNORE_OFF_PROC_ENTRIES:
1718     a->donotstash = flg;
1719     break;
1720   case MAT_SPD:
1721     A->spd_set = PETSC_TRUE;
1722     A->spd     = flg;
1723     if (flg) {
1724       A->symmetric                  = PETSC_TRUE;
1725       A->structurally_symmetric     = PETSC_TRUE;
1726       A->symmetric_set              = PETSC_TRUE;
1727       A->structurally_symmetric_set = PETSC_TRUE;
1728     }
1729     break;
1730   case MAT_SYMMETRIC:
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_STRUCTURALLY_SYMMETRIC:
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_HERMITIAN:
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_SYMMETRY_ETERNAL:
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     break;
1742   default:
1743     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatGetRow_MPIAIJ"
1750 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1751 {
1752   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1753   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1754   PetscErrorCode ierr;
1755   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1756   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1757   PetscInt       *cmap,*idx_p;
1758 
1759   PetscFunctionBegin;
1760   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1761   mat->getrowactive = PETSC_TRUE;
1762 
1763   if (!mat->rowvalues && (idx || v)) {
1764     /*
1765         allocate enough space to hold information from the longest row.
1766     */
1767     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1768     PetscInt   max = 1,tmp;
1769     for (i=0; i<matin->rmap->n; i++) {
1770       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1771       if (max < tmp) max = tmp;
1772     }
1773     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1774   }
1775 
1776   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1777   lrow = row - rstart;
1778 
1779   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1780   if (!v)   {pvA = 0; pvB = 0;}
1781   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1782   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1783   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1784   nztot = nzA + nzB;
1785 
1786   cmap = mat->garray;
1787   if (v  || idx) {
1788     if (nztot) {
1789       /* Sort by increasing column numbers, assuming A and B already sorted */
1790       PetscInt imark = -1;
1791       if (v) {
1792         *v = v_p = mat->rowvalues;
1793         for (i=0; i<nzB; i++) {
1794           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1795           else break;
1796         }
1797         imark = i;
1798         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1799         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1800       }
1801       if (idx) {
1802         *idx = idx_p = mat->rowindices;
1803         if (imark > -1) {
1804           for (i=0; i<imark; i++) {
1805             idx_p[i] = cmap[cworkB[i]];
1806           }
1807         } else {
1808           for (i=0; i<nzB; i++) {
1809             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1810             else break;
1811           }
1812           imark = i;
1813         }
1814         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1815         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1816       }
1817     } else {
1818       if (idx) *idx = 0;
1819       if (v)   *v   = 0;
1820     }
1821   }
1822   *nz  = nztot;
1823   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1824   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1830 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1831 {
1832   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1833 
1834   PetscFunctionBegin;
1835   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1836   aij->getrowactive = PETSC_FALSE;
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatNorm_MPIAIJ"
1842 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1843 {
1844   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1845   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1846   PetscErrorCode ierr;
1847   PetscInt       i,j,cstart = mat->cmap->rstart;
1848   PetscReal      sum = 0.0;
1849   MatScalar      *v;
1850 
1851   PetscFunctionBegin;
1852   if (aij->size == 1) {
1853     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1854   } else {
1855     if (type == NORM_FROBENIUS) {
1856       v = amat->a;
1857       for (i=0; i<amat->nz; i++) {
1858         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1859       }
1860       v = bmat->a;
1861       for (i=0; i<bmat->nz; i++) {
1862         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1863       }
1864       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1865       *norm = PetscSqrtReal(*norm);
1866     } else if (type == NORM_1) { /* max column norm */
1867       PetscReal *tmp,*tmp2;
1868       PetscInt  *jj,*garray = aij->garray;
1869       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1870       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1871       *norm = 0.0;
1872       v     = amat->a; jj = amat->j;
1873       for (j=0; j<amat->nz; j++) {
1874         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1875       }
1876       v = bmat->a; jj = bmat->j;
1877       for (j=0; j<bmat->nz; j++) {
1878         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1879       }
1880       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1881       for (j=0; j<mat->cmap->N; j++) {
1882         if (tmp2[j] > *norm) *norm = tmp2[j];
1883       }
1884       ierr = PetscFree(tmp);CHKERRQ(ierr);
1885       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1886     } else if (type == NORM_INFINITY) { /* max row norm */
1887       PetscReal ntemp = 0.0;
1888       for (j=0; j<aij->A->rmap->n; j++) {
1889         v   = amat->a + amat->i[j];
1890         sum = 0.0;
1891         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1892           sum += PetscAbsScalar(*v); v++;
1893         }
1894         v = bmat->a + bmat->i[j];
1895         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1896           sum += PetscAbsScalar(*v); v++;
1897         }
1898         if (sum > ntemp) ntemp = sum;
1899       }
1900       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1901     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatTranspose_MPIAIJ"
1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1909 {
1910   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1911   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1912   PetscErrorCode ierr;
1913   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1914   PetscInt       cstart = A->cmap->rstart,ncol;
1915   Mat            B;
1916   MatScalar      *array;
1917 
1918   PetscFunctionBegin;
1919   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1920 
1921   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1922   ai = Aloc->i; aj = Aloc->j;
1923   bi = Bloc->i; bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1926     PetscSFNode          *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1930     /* compute d_nnz for preallocation */
1931     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1932     for (i=0; i<ai[ma]; i++) {
1933       d_nnz[aj[i]]++;
1934       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1935     }
1936     /* compute local off-diagonal contributions */
1937     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1939     /* map those to global */
1940     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1941     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1942     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1943     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1944     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1945     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1946     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1947 
1948     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1949     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1950     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
1951     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1952     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1953     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1954   } else {
1955     B    = *matout;
1956     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1957     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1958   }
1959 
1960   /* copy over the A part */
1961   array = Aloc->a;
1962   row   = A->rmap->rstart;
1963   for (i=0; i<ma; i++) {
1964     ncol = ai[i+1]-ai[i];
1965     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; aj += ncol;
1968   }
1969   aj = Aloc->j;
1970   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1971 
1972   /* copy over the B part */
1973   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1974   array = Bloc->a;
1975   row   = A->rmap->rstart;
1976   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1977   cols_tmp = cols;
1978   for (i=0; i<mb; i++) {
1979     ncol = bi[i+1]-bi[i];
1980     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     array += ncol; cols_tmp += ncol;
1983   }
1984   ierr = PetscFree(cols);CHKERRQ(ierr);
1985 
1986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1989     *matout = B;
1990   } else {
1991     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1998 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1999 {
2000   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2001   Mat            a    = aij->A,b = aij->B;
2002   PetscErrorCode ierr;
2003   PetscInt       s1,s2,s3;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2007   if (rr) {
2008     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2009     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2010     /* Overlap communication with computation. */
2011     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2012   }
2013   if (ll) {
2014     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2015     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2016     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2017   }
2018   /* scale  the diagonal block */
2019   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2020 
2021   if (rr) {
2022     /* Do a scatter end and then right scale the off-diagonal block */
2023     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2024     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2031 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2032 {
2033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatEqual_MPIAIJ"
2043 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2044 {
2045   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2046   Mat            a,b,c,d;
2047   PetscBool      flg;
2048   PetscErrorCode ierr;
2049 
2050   PetscFunctionBegin;
2051   a = matA->A; b = matA->B;
2052   c = matB->A; d = matB->B;
2053 
2054   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2055   if (flg) {
2056     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2057   }
2058   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatCopy_MPIAIJ"
2064 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2065 {
2066   PetscErrorCode ierr;
2067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2068   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2069 
2070   PetscFunctionBegin;
2071   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2072   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2073     /* because of the column compression in the off-processor part of the matrix a->B,
2074        the number of columns in a->B and b->B may be different, hence we cannot call
2075        the MatCopy() directly on the two parts. If need be, we can provide a more
2076        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2077        then copying the submatrices */
2078     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2079   } else {
2080     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2081     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatSetUp_MPIAIJ"
2088 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2089 {
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2101 {
2102   PetscInt       i,m=Y->rmap->N;
2103   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2104   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2105   const PetscInt *xi = x->i,*yi = y->i;
2106 
2107   PetscFunctionBegin;
2108   /* Set the number of nonzeros in the new matrix */
2109   for (i=0; i<m; i++) {
2110     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2111     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2112     nnz[i] = 0;
2113     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2114       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2115       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2116       nnz[i]++;
2117     }
2118     for (; k<nzy; k++) nnz[i]++;
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatAXPY_MPIAIJ"
2125 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2126 {
2127   PetscErrorCode ierr;
2128   PetscInt       i;
2129   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2130   PetscBLASInt   bnz,one=1;
2131   Mat_SeqAIJ     *x,*y;
2132 
2133   PetscFunctionBegin;
2134   if (str == SAME_NONZERO_PATTERN) {
2135     PetscScalar alpha = a;
2136     x    = (Mat_SeqAIJ*)xx->A->data;
2137     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2138     y    = (Mat_SeqAIJ*)yy->A->data;
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     x    = (Mat_SeqAIJ*)xx->B->data;
2141     y    = (Mat_SeqAIJ*)yy->B->data;
2142     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2143     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2144   } else if (str == SUBSET_NONZERO_PATTERN) {
2145     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2146 
2147     x = (Mat_SeqAIJ*)xx->B->data;
2148     y = (Mat_SeqAIJ*)yy->B->data;
2149     if (y->xtoy && y->XtoY != xx->B) {
2150       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2151       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2152     }
2153     if (!y->xtoy) { /* get xtoy */
2154       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2155       y->XtoY = xx->B;
2156       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2157     }
2158     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2159   } else {
2160     Mat      B;
2161     PetscInt *nnz_d,*nnz_o;
2162     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2163     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2164     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2165     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2166     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2167     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2168     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2169     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2170     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2171     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2172     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2173     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2174     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2175     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatConjugate_MPIAIJ"
2184 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2185 {
2186 #if defined(PETSC_USE_COMPLEX)
2187   PetscErrorCode ierr;
2188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2192   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2193 #else
2194   PetscFunctionBegin;
2195 #endif
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatRealPart_MPIAIJ"
2201 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2208   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2215 {
2216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2221   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #if defined(PETSC_HAVE_PBGL)
2226 
2227 #include <boost/parallel/mpi/bsp_process_group.hpp>
2228 #include <boost/graph/distributed/ilu_default_graph.hpp>
2229 #include <boost/graph/distributed/ilu_0_block.hpp>
2230 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2231 #include <boost/graph/distributed/petsc/interface.hpp>
2232 #include <boost/multi_array.hpp>
2233 #include <boost/parallel/distributed_property_map->hpp>
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2237 /*
2238   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2239 */
2240 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2241 {
2242   namespace petsc = boost::distributed::petsc;
2243 
2244   namespace graph_dist = boost::graph::distributed;
2245   using boost::graph::distributed::ilu_default::process_group_type;
2246   using boost::graph::ilu_permuted;
2247 
2248   PetscBool      row_identity, col_identity;
2249   PetscContainer c;
2250   PetscInt       m, n, M, N;
2251   PetscErrorCode ierr;
2252 
2253   PetscFunctionBegin;
2254   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2255   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2256   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2257   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2258 
2259   process_group_type pg;
2260   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2261   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2262   lgraph_type& level_graph = *lgraph_p;
2263   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2264 
2265   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2266   ilu_permuted(level_graph);
2267 
2268   /* put together the new matrix */
2269   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2270   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2271   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2272   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2273   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2274   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2275   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2276   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2277 
2278   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2279   ierr = PetscContainerSetPointer(c, lgraph_p);
2280   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2281   ierr = PetscContainerDestroy(&c);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2287 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2288 {
2289   PetscFunctionBegin;
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatSolve_MPIAIJ"
2295 /*
2296   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2297 */
2298 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2299 {
2300   namespace graph_dist = boost::graph::distributed;
2301 
2302   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2303   lgraph_type    *lgraph_p;
2304   PetscContainer c;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2309   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2310   ierr = VecCopy(b, x);CHKERRQ(ierr);
2311 
2312   PetscScalar *array_x;
2313   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2314   PetscInt sx;
2315   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2316 
2317   PetscScalar *array_b;
2318   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2319   PetscInt sb;
2320   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2321 
2322   lgraph_type& level_graph = *lgraph_p;
2323   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2324 
2325   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2326   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2327   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2328 
2329   typedef boost::iterator_property_map<array_ref_type::iterator,
2330                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2331   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2332   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2333 
2334   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2335   PetscFunctionReturn(0);
2336 }
2337 #endif
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatDestroy_MatRedundant"
2341 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2342 {
2343   PetscErrorCode ierr;
2344   Mat_Redundant  *redund;
2345   PetscInt       i;
2346   PetscMPIInt    size;
2347 
2348   PetscFunctionBegin;
2349   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2350   if (size == 1) {
2351     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2352     redund = a->redundant;
2353   } else {
2354     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2355     redund = a->redundant;
2356   }
2357   if (redund){
2358     if (redund->matseq) { /* via MatGetSubMatrices()  */
2359       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2360       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2361       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2362       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2363     } else {
2364       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2365       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2366       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2367       for (i=0; i<redund->nrecvs; i++) {
2368         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2369         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2370       }
2371       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2372     }
2373 
2374     if (redund->psubcomm) {
2375       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2376     }
2377     ierr = redund->Destroy(A);CHKERRQ(ierr);
2378     ierr = PetscFree(redund);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2385 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2386 {
2387   PetscMPIInt    rank,size;
2388   MPI_Comm       comm;
2389   PetscErrorCode ierr;
2390   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2391   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2392   PetscInt       *rowrange = mat->rmap->range;
2393   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2394   Mat            A = aij->A,B=aij->B,C=*matredundant;
2395   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2396   PetscScalar    *sbuf_a;
2397   PetscInt       nzlocal=a->nz+b->nz;
2398   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2399   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2400   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2401   MatScalar      *aworkA,*aworkB;
2402   PetscScalar    *vals;
2403   PetscMPIInt    tag1,tag2,tag3,imdex;
2404   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2405   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2406   MPI_Status     recv_status,*send_status;
2407   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2408   PetscInt       **rbuf_j=NULL;
2409   PetscScalar    **rbuf_a=NULL;
2410   Mat_Redundant  *redund =NULL;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2414   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2416   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2417   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2418 
2419   if (reuse == MAT_REUSE_MATRIX) {
2420     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2421     if (subsize == 1) {
2422       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2423       redund = c->redundant;
2424     } else {
2425       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2426       redund = c->redundant;
2427     }
2428     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2429 
2430     nsends    = redund->nsends;
2431     nrecvs    = redund->nrecvs;
2432     send_rank = redund->send_rank;
2433     recv_rank = redund->recv_rank;
2434     sbuf_nz   = redund->sbuf_nz;
2435     rbuf_nz   = redund->rbuf_nz;
2436     sbuf_j    = redund->sbuf_j;
2437     sbuf_a    = redund->sbuf_a;
2438     rbuf_j    = redund->rbuf_j;
2439     rbuf_a    = redund->rbuf_a;
2440   }
2441 
2442   if (reuse == MAT_INITIAL_MATRIX) {
2443     PetscInt    nleftover,np_subcomm;
2444 
2445     /* get the destination processors' id send_rank, nsends and nrecvs */
2446     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2447 
2448     np_subcomm = size/nsubcomm;
2449     nleftover  = size - nsubcomm*np_subcomm;
2450 
2451     /* block of codes below is specific for INTERLACED */
2452     /* ------------------------------------------------*/
2453     nsends = 0; nrecvs = 0;
2454     for (i=0; i<size; i++) {
2455       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2456         send_rank[nsends++] = i;
2457         recv_rank[nrecvs++] = i;
2458       }
2459     }
2460     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2461       i = size-nleftover-1;
2462       j = 0;
2463       while (j < nsubcomm - nleftover) {
2464         send_rank[nsends++] = i;
2465         i--; j++;
2466       }
2467     }
2468 
2469     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2470       for (i=0; i<nleftover; i++) {
2471         recv_rank[nrecvs++] = size-nleftover+i;
2472       }
2473     }
2474     /*----------------------------------------------*/
2475 
2476     /* allocate sbuf_j, sbuf_a */
2477     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2478     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2479     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2480     /*
2481     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2482     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2483      */
2484   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2485 
2486   /* copy mat's local entries into the buffers */
2487   if (reuse == MAT_INITIAL_MATRIX) {
2488     rownz_max = 0;
2489     rptr      = sbuf_j;
2490     cols      = sbuf_j + rend-rstart + 1;
2491     vals      = sbuf_a;
2492     rptr[0]   = 0;
2493     for (i=0; i<rend-rstart; i++) {
2494       row    = i + rstart;
2495       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2496       ncols  = nzA + nzB;
2497       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2498       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2499       /* load the column indices for this row into cols */
2500       lwrite = 0;
2501       for (l=0; l<nzB; l++) {
2502         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2503           vals[lwrite]   = aworkB[l];
2504           cols[lwrite++] = ctmp;
2505         }
2506       }
2507       for (l=0; l<nzA; l++) {
2508         vals[lwrite]   = aworkA[l];
2509         cols[lwrite++] = cstart + cworkA[l];
2510       }
2511       for (l=0; l<nzB; l++) {
2512         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2513           vals[lwrite]   = aworkB[l];
2514           cols[lwrite++] = ctmp;
2515         }
2516       }
2517       vals     += ncols;
2518       cols     += ncols;
2519       rptr[i+1] = rptr[i] + ncols;
2520       if (rownz_max < ncols) rownz_max = ncols;
2521     }
2522     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2523   } else { /* only copy matrix values into sbuf_a */
2524     rptr    = sbuf_j;
2525     vals    = sbuf_a;
2526     rptr[0] = 0;
2527     for (i=0; i<rend-rstart; i++) {
2528       row    = i + rstart;
2529       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2530       ncols  = nzA + nzB;
2531       cworkB = b->j + b->i[i];
2532       aworkA = a->a + a->i[i];
2533       aworkB = b->a + b->i[i];
2534       lwrite = 0;
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2537       }
2538       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2539       for (l=0; l<nzB; l++) {
2540         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2541       }
2542       vals     += ncols;
2543       rptr[i+1] = rptr[i] + ncols;
2544     }
2545   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2546 
2547   /* send nzlocal to others, and recv other's nzlocal */
2548   /*--------------------------------------------------*/
2549   if (reuse == MAT_INITIAL_MATRIX) {
2550     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2551 
2552     s_waits2 = s_waits3 + nsends;
2553     s_waits1 = s_waits2 + nsends;
2554     r_waits1 = s_waits1 + nsends;
2555     r_waits2 = r_waits1 + nrecvs;
2556     r_waits3 = r_waits2 + nrecvs;
2557   } else {
2558     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2559 
2560     r_waits3 = s_waits3 + nsends;
2561   }
2562 
2563   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2564   if (reuse == MAT_INITIAL_MATRIX) {
2565     /* get new tags to keep the communication clean */
2566     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2567     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2568     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2569 
2570     /* post receives of other's nzlocal */
2571     for (i=0; i<nrecvs; i++) {
2572       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2573     }
2574     /* send nzlocal to others */
2575     for (i=0; i<nsends; i++) {
2576       sbuf_nz[i] = nzlocal;
2577       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2578     }
2579     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2580     count = nrecvs;
2581     while (count) {
2582       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2583 
2584       recv_rank[imdex] = recv_status.MPI_SOURCE;
2585       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2586       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2587 
2588       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2589 
2590       rbuf_nz[imdex] += i + 2;
2591 
2592       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2593       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2594       count--;
2595     }
2596     /* wait on sends of nzlocal */
2597     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2598     /* send mat->i,j to others, and recv from other's */
2599     /*------------------------------------------------*/
2600     for (i=0; i<nsends; i++) {
2601       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2602       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2603     }
2604     /* wait on receives of mat->i,j */
2605     /*------------------------------*/
2606     count = nrecvs;
2607     while (count) {
2608       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2609       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2610       count--;
2611     }
2612     /* wait on sends of mat->i,j */
2613     /*---------------------------*/
2614     if (nsends) {
2615       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2616     }
2617   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2618 
2619   /* post receives, send and receive mat->a */
2620   /*----------------------------------------*/
2621   for (imdex=0; imdex<nrecvs; imdex++) {
2622     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2623   }
2624   for (i=0; i<nsends; i++) {
2625     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2626   }
2627   count = nrecvs;
2628   while (count) {
2629     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2630     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2631     count--;
2632   }
2633   if (nsends) {
2634     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2635   }
2636 
2637   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2638 
2639   /* create redundant matrix */
2640   /*-------------------------*/
2641   if (reuse == MAT_INITIAL_MATRIX) {
2642     const PetscInt *range;
2643     PetscInt       rstart_sub,rend_sub,mloc_sub;
2644 
2645     /* compute rownz_max for preallocation */
2646     for (imdex=0; imdex<nrecvs; imdex++) {
2647       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2648       rptr = rbuf_j[imdex];
2649       for (i=0; i<j; i++) {
2650         ncols = rptr[i+1] - rptr[i];
2651         if (rownz_max < ncols) rownz_max = ncols;
2652       }
2653     }
2654 
2655     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2656 
2657     /* get local size of redundant matrix
2658        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2659     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2660     rstart_sub = range[nsubcomm*subrank];
2661     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2662       rend_sub = range[nsubcomm*(subrank+1)];
2663     } else {
2664       rend_sub = mat->rmap->N;
2665     }
2666     mloc_sub = rend_sub - rstart_sub;
2667 
2668     if (M == N) {
2669       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2670     } else { /* non-square matrix */
2671       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2672     }
2673     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2674     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2675     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2676     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2677   } else {
2678     C = *matredundant;
2679   }
2680 
2681   /* insert local matrix entries */
2682   rptr = sbuf_j;
2683   cols = sbuf_j + rend-rstart + 1;
2684   vals = sbuf_a;
2685   for (i=0; i<rend-rstart; i++) {
2686     row   = i + rstart;
2687     ncols = rptr[i+1] - rptr[i];
2688     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2689     vals += ncols;
2690     cols += ncols;
2691   }
2692   /* insert received matrix entries */
2693   for (imdex=0; imdex<nrecvs; imdex++) {
2694     rstart = rowrange[recv_rank[imdex]];
2695     rend   = rowrange[recv_rank[imdex]+1];
2696     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2697     rptr   = rbuf_j[imdex];
2698     cols   = rbuf_j[imdex] + rend-rstart + 1;
2699     vals   = rbuf_a[imdex];
2700     for (i=0; i<rend-rstart; i++) {
2701       row   = i + rstart;
2702       ncols = rptr[i+1] - rptr[i];
2703       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2704       vals += ncols;
2705       cols += ncols;
2706     }
2707   }
2708   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2709   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710 
2711   if (reuse == MAT_INITIAL_MATRIX) {
2712     *matredundant = C;
2713 
2714     /* create a supporting struct and attach it to C for reuse */
2715     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2716     if (subsize == 1) {
2717       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2718       c->redundant = redund;
2719     } else {
2720       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2721       c->redundant = redund;
2722     }
2723 
2724     redund->nzlocal   = nzlocal;
2725     redund->nsends    = nsends;
2726     redund->nrecvs    = nrecvs;
2727     redund->send_rank = send_rank;
2728     redund->recv_rank = recv_rank;
2729     redund->sbuf_nz   = sbuf_nz;
2730     redund->rbuf_nz   = rbuf_nz;
2731     redund->sbuf_j    = sbuf_j;
2732     redund->sbuf_a    = sbuf_a;
2733     redund->rbuf_j    = rbuf_j;
2734     redund->rbuf_a    = rbuf_a;
2735     redund->psubcomm  = NULL;
2736 
2737     redund->Destroy = C->ops->destroy;
2738     C->ops->destroy = MatDestroy_MatRedundant;
2739   }
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2745 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2746 {
2747   PetscErrorCode ierr;
2748   MPI_Comm       comm;
2749   PetscMPIInt    size,subsize;
2750   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2751   Mat_Redundant  *redund=NULL;
2752   PetscSubcomm   psubcomm=NULL;
2753   MPI_Comm       subcomm_in=subcomm;
2754   Mat            *matseq;
2755   IS             isrow,iscol;
2756 
2757   PetscFunctionBegin;
2758   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2759     if (reuse ==  MAT_INITIAL_MATRIX) {
2760       /* create psubcomm, then get subcomm */
2761       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2762       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2763       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2764 
2765       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2766       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2767       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2768       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2769       subcomm = psubcomm->comm;
2770     } else { /* retrieve psubcomm and subcomm */
2771       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2772       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2773       if (subsize == 1) {
2774         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2775         redund = c->redundant;
2776       } else {
2777         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2778         redund = c->redundant;
2779       }
2780       psubcomm = redund->psubcomm;
2781     }
2782     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2783       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2784       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2785         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2786         if (subsize == 1) {
2787           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2788           c->redundant->psubcomm = psubcomm;
2789         } else {
2790           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2791           c->redundant->psubcomm = psubcomm ;
2792         }
2793       }
2794       PetscFunctionReturn(0);
2795     }
2796   }
2797 
2798   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2799   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2800   if (reuse == MAT_INITIAL_MATRIX) {
2801     /* create a local sequential matrix matseq[0] */
2802     mloc_sub = PETSC_DECIDE;
2803     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2804     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2805     rstart = rend - mloc_sub;
2806     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2807     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2808   } else { /* reuse == MAT_REUSE_MATRIX */
2809     if (subsize == 1) {
2810       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2811       redund = c->redundant;
2812     } else {
2813       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2814       redund = c->redundant;
2815     }
2816 
2817     isrow  = redund->isrow;
2818     iscol  = redund->iscol;
2819     matseq = redund->matseq;
2820   }
2821   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2822   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2823 
2824   if (reuse == MAT_INITIAL_MATRIX) {
2825     /* create a supporting struct and attach it to C for reuse */
2826     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2827     if (subsize == 1) {
2828       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2829       c->redundant = redund;
2830     } else {
2831       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2832       c->redundant = redund;
2833     }
2834     redund->isrow    = isrow;
2835     redund->iscol    = iscol;
2836     redund->matseq   = matseq;
2837     redund->psubcomm = psubcomm;
2838     redund->Destroy               = (*matredundant)->ops->destroy;
2839     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2840   }
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2846 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2847 {
2848   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2849   PetscErrorCode ierr;
2850   PetscInt       i,*idxb = 0;
2851   PetscScalar    *va,*vb;
2852   Vec            vtmp;
2853 
2854   PetscFunctionBegin;
2855   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2856   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2857   if (idx) {
2858     for (i=0; i<A->rmap->n; i++) {
2859       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2860     }
2861   }
2862 
2863   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2864   if (idx) {
2865     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2866   }
2867   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2868   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2869 
2870   for (i=0; i<A->rmap->n; i++) {
2871     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2872       va[i] = vb[i];
2873       if (idx) idx[i] = a->garray[idxb[i]];
2874     }
2875   }
2876 
2877   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2878   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2879   ierr = PetscFree(idxb);CHKERRQ(ierr);
2880   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 #undef __FUNCT__
2885 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2886 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2887 {
2888   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2889   PetscErrorCode ierr;
2890   PetscInt       i,*idxb = 0;
2891   PetscScalar    *va,*vb;
2892   Vec            vtmp;
2893 
2894   PetscFunctionBegin;
2895   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2896   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2897   if (idx) {
2898     for (i=0; i<A->cmap->n; i++) {
2899       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2900     }
2901   }
2902 
2903   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2904   if (idx) {
2905     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2906   }
2907   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2908   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2909 
2910   for (i=0; i<A->rmap->n; i++) {
2911     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2912       va[i] = vb[i];
2913       if (idx) idx[i] = a->garray[idxb[i]];
2914     }
2915   }
2916 
2917   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2918   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2919   ierr = PetscFree(idxb);CHKERRQ(ierr);
2920   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2926 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2927 {
2928   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2929   PetscInt       n      = A->rmap->n;
2930   PetscInt       cstart = A->cmap->rstart;
2931   PetscInt       *cmap  = mat->garray;
2932   PetscInt       *diagIdx, *offdiagIdx;
2933   Vec            diagV, offdiagV;
2934   PetscScalar    *a, *diagA, *offdiagA;
2935   PetscInt       r;
2936   PetscErrorCode ierr;
2937 
2938   PetscFunctionBegin;
2939   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2940   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2941   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2942   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2943   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2944   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2945   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2946   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2947   for (r = 0; r < n; ++r) {
2948     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2949       a[r]   = diagA[r];
2950       idx[r] = cstart + diagIdx[r];
2951     } else {
2952       a[r]   = offdiagA[r];
2953       idx[r] = cmap[offdiagIdx[r]];
2954     }
2955   }
2956   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2957   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2958   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2959   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2960   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2961   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2967 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2968 {
2969   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2970   PetscInt       n      = A->rmap->n;
2971   PetscInt       cstart = A->cmap->rstart;
2972   PetscInt       *cmap  = mat->garray;
2973   PetscInt       *diagIdx, *offdiagIdx;
2974   Vec            diagV, offdiagV;
2975   PetscScalar    *a, *diagA, *offdiagA;
2976   PetscInt       r;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2981   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2982   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2983   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2984   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2985   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2986   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2987   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2988   for (r = 0; r < n; ++r) {
2989     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2990       a[r]   = diagA[r];
2991       idx[r] = cstart + diagIdx[r];
2992     } else {
2993       a[r]   = offdiagA[r];
2994       idx[r] = cmap[offdiagIdx[r]];
2995     }
2996   }
2997   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2998   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2999   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3000   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3001   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3002   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3008 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3009 {
3010   PetscErrorCode ierr;
3011   Mat            *dummy;
3012 
3013   PetscFunctionBegin;
3014   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3015   *newmat = *dummy;
3016   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3022 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3023 {
3024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3034 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3035 {
3036   PetscErrorCode ierr;
3037   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3038 
3039   PetscFunctionBegin;
3040   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3041   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3042   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 /* -------------------------------------------------------------------*/
3048 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3049                                        MatGetRow_MPIAIJ,
3050                                        MatRestoreRow_MPIAIJ,
3051                                        MatMult_MPIAIJ,
3052                                 /* 4*/ MatMultAdd_MPIAIJ,
3053                                        MatMultTranspose_MPIAIJ,
3054                                        MatMultTransposeAdd_MPIAIJ,
3055 #if defined(PETSC_HAVE_PBGL)
3056                                        MatSolve_MPIAIJ,
3057 #else
3058                                        0,
3059 #endif
3060                                        0,
3061                                        0,
3062                                 /*10*/ 0,
3063                                        0,
3064                                        0,
3065                                        MatSOR_MPIAIJ,
3066                                        MatTranspose_MPIAIJ,
3067                                 /*15*/ MatGetInfo_MPIAIJ,
3068                                        MatEqual_MPIAIJ,
3069                                        MatGetDiagonal_MPIAIJ,
3070                                        MatDiagonalScale_MPIAIJ,
3071                                        MatNorm_MPIAIJ,
3072                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3073                                        MatAssemblyEnd_MPIAIJ,
3074                                        MatSetOption_MPIAIJ,
3075                                        MatZeroEntries_MPIAIJ,
3076                                 /*24*/ MatZeroRows_MPIAIJ,
3077                                        0,
3078 #if defined(PETSC_HAVE_PBGL)
3079                                        0,
3080 #else
3081                                        0,
3082 #endif
3083                                        0,
3084                                        0,
3085                                 /*29*/ MatSetUp_MPIAIJ,
3086 #if defined(PETSC_HAVE_PBGL)
3087                                        0,
3088 #else
3089                                        0,
3090 #endif
3091                                        0,
3092                                        0,
3093                                        0,
3094                                 /*34*/ MatDuplicate_MPIAIJ,
3095                                        0,
3096                                        0,
3097                                        0,
3098                                        0,
3099                                 /*39*/ MatAXPY_MPIAIJ,
3100                                        MatGetSubMatrices_MPIAIJ,
3101                                        MatIncreaseOverlap_MPIAIJ,
3102                                        MatGetValues_MPIAIJ,
3103                                        MatCopy_MPIAIJ,
3104                                 /*44*/ MatGetRowMax_MPIAIJ,
3105                                        MatScale_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        MatZeroRowsColumns_MPIAIJ,
3109                                 /*49*/ MatSetRandom_MPIAIJ,
3110                                        0,
3111                                        0,
3112                                        0,
3113                                        0,
3114                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3115                                        0,
3116                                        MatSetUnfactored_MPIAIJ,
3117                                        MatPermute_MPIAIJ,
3118                                        0,
3119                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3120                                        MatDestroy_MPIAIJ,
3121                                        MatView_MPIAIJ,
3122                                        0,
3123                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3124                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3125                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3126                                        0,
3127                                        0,
3128                                        0,
3129                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3130                                        MatGetRowMinAbs_MPIAIJ,
3131                                        0,
3132                                        MatSetColoring_MPIAIJ,
3133                                        0,
3134                                        MatSetValuesAdifor_MPIAIJ,
3135                                 /*75*/ MatFDColoringApply_AIJ,
3136                                        0,
3137                                        0,
3138                                        0,
3139                                        MatFindZeroDiagonals_MPIAIJ,
3140                                 /*80*/ 0,
3141                                        0,
3142                                        0,
3143                                 /*83*/ MatLoad_MPIAIJ,
3144                                        0,
3145                                        0,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3150                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3151                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3152                                        MatPtAP_MPIAIJ_MPIAIJ,
3153                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3154                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                 /*99*/ 0,
3160                                        0,
3161                                        0,
3162                                        MatConjugate_MPIAIJ,
3163                                        0,
3164                                 /*104*/MatSetValuesRow_MPIAIJ,
3165                                        MatRealPart_MPIAIJ,
3166                                        MatImaginaryPart_MPIAIJ,
3167                                        0,
3168                                        0,
3169                                 /*109*/0,
3170                                        MatGetRedundantMatrix_MPIAIJ,
3171                                        MatGetRowMin_MPIAIJ,
3172                                        0,
3173                                        0,
3174                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3175                                        0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                 /*119*/0,
3180                                        0,
3181                                        0,
3182                                        0,
3183                                        MatGetMultiProcBlock_MPIAIJ,
3184                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3185                                        MatGetColumnNorms_MPIAIJ,
3186                                        MatInvertBlockDiagonal_MPIAIJ,
3187                                        0,
3188                                        MatGetSubMatricesParallel_MPIAIJ,
3189                                 /*129*/0,
3190                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3191                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3192                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3193                                        0,
3194                                 /*134*/0,
3195                                        0,
3196                                        0,
3197                                        0,
3198                                        0,
3199                                 /*139*/0,
3200                                        0,
3201                                        0,
3202                                        MatFDColoringSetUp_MPIXAIJ
3203 };
3204 
3205 /* ----------------------------------------------------------------------------------------*/
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3209 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3210 {
3211   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3212   PetscErrorCode ierr;
3213 
3214   PetscFunctionBegin;
3215   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3216   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3222 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3223 {
3224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3225   PetscErrorCode ierr;
3226 
3227   PetscFunctionBegin;
3228   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3229   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3235 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3236 {
3237   Mat_MPIAIJ     *b;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3243   b = (Mat_MPIAIJ*)B->data;
3244 
3245   if (!B->preallocated) {
3246     /* Explicitly create 2 MATSEQAIJ matrices. */
3247     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3248     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3249     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3250     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3251     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3252     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3253     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3254     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3255     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3256     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3257   }
3258 
3259   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3260   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3261   B->preallocated = PETSC_TRUE;
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3267 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3268 {
3269   Mat            mat;
3270   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3271   PetscErrorCode ierr;
3272 
3273   PetscFunctionBegin;
3274   *newmat = 0;
3275   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3276   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3277   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3278   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3279   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3280   a       = (Mat_MPIAIJ*)mat->data;
3281 
3282   mat->factortype   = matin->factortype;
3283   mat->rmap->bs     = matin->rmap->bs;
3284   mat->cmap->bs     = matin->cmap->bs;
3285   mat->assembled    = PETSC_TRUE;
3286   mat->insertmode   = NOT_SET_VALUES;
3287   mat->preallocated = PETSC_TRUE;
3288 
3289   a->size         = oldmat->size;
3290   a->rank         = oldmat->rank;
3291   a->donotstash   = oldmat->donotstash;
3292   a->roworiented  = oldmat->roworiented;
3293   a->rowindices   = 0;
3294   a->rowvalues    = 0;
3295   a->getrowactive = PETSC_FALSE;
3296 
3297   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3298   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3299 
3300   if (oldmat->colmap) {
3301 #if defined(PETSC_USE_CTABLE)
3302     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3303 #else
3304     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3305     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3306     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3307 #endif
3308   } else a->colmap = 0;
3309   if (oldmat->garray) {
3310     PetscInt len;
3311     len  = oldmat->B->cmap->n;
3312     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3313     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3314     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3315   } else a->garray = 0;
3316 
3317   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3318   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3319   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3320   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3321   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3322   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3323   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3324   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3325   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3326   *newmat = mat;
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatLoad_MPIAIJ"
3334 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3335 {
3336   PetscScalar    *vals,*svals;
3337   MPI_Comm       comm;
3338   PetscErrorCode ierr;
3339   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3340   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3341   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3342   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3343   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3344   int            fd;
3345   PetscInt       bs = 1;
3346 
3347   PetscFunctionBegin;
3348   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3349   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3350   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3351   if (!rank) {
3352     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3353     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3354     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3355   }
3356 
3357   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3358   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3359   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3360 
3361   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3362 
3363   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3364   M    = header[1]; N = header[2];
3365   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3366   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3367   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3368 
3369   /* If global sizes are set, check if they are consistent with that given in the file */
3370   if (sizesset) {
3371     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3372   }
3373   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3374   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3375 
3376   /* determine ownership of all (block) rows */
3377   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3378   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3379   else m = newMat->rmap->n; /* Set by user */
3380 
3381   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3382   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3383 
3384   /* First process needs enough room for process with most rows */
3385   if (!rank) {
3386     mmax = rowners[1];
3387     for (i=2; i<=size; i++) {
3388       mmax = PetscMax(mmax, rowners[i]);
3389     }
3390   } else mmax = -1;             /* unused, but compilers complain */
3391 
3392   rowners[0] = 0;
3393   for (i=2; i<=size; i++) {
3394     rowners[i] += rowners[i-1];
3395   }
3396   rstart = rowners[rank];
3397   rend   = rowners[rank+1];
3398 
3399   /* distribute row lengths to all processors */
3400   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3401   if (!rank) {
3402     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3403     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3404     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3405     for (j=0; j<m; j++) {
3406       procsnz[0] += ourlens[j];
3407     }
3408     for (i=1; i<size; i++) {
3409       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3410       /* calculate the number of nonzeros on each processor */
3411       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3412         procsnz[i] += rowlengths[j];
3413       }
3414       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3415     }
3416     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3417   } else {
3418     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3419   }
3420 
3421   if (!rank) {
3422     /* determine max buffer needed and allocate it */
3423     maxnz = 0;
3424     for (i=0; i<size; i++) {
3425       maxnz = PetscMax(maxnz,procsnz[i]);
3426     }
3427     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix column indices  */
3430     nz   = procsnz[0];
3431     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3432     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3433 
3434     /* read in every one elses and ship off */
3435     for (i=1; i<size; i++) {
3436       nz   = procsnz[i];
3437       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3438       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3439     }
3440     ierr = PetscFree(cols);CHKERRQ(ierr);
3441   } else {
3442     /* determine buffer space needed for message */
3443     nz = 0;
3444     for (i=0; i<m; i++) {
3445       nz += ourlens[i];
3446     }
3447     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3448 
3449     /* receive message of column indices*/
3450     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3451   }
3452 
3453   /* determine column ownership if matrix is not square */
3454   if (N != M) {
3455     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3456     else n = newMat->cmap->n;
3457     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3458     cstart = cend - n;
3459   } else {
3460     cstart = rstart;
3461     cend   = rend;
3462     n      = cend - cstart;
3463   }
3464 
3465   /* loop over local rows, determining number of off diagonal entries */
3466   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3467   jj   = 0;
3468   for (i=0; i<m; i++) {
3469     for (j=0; j<ourlens[i]; j++) {
3470       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3471       jj++;
3472     }
3473   }
3474 
3475   for (i=0; i<m; i++) {
3476     ourlens[i] -= offlens[i];
3477   }
3478   if (!sizesset) {
3479     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3480   }
3481 
3482   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3483 
3484   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3485 
3486   for (i=0; i<m; i++) {
3487     ourlens[i] += offlens[i];
3488   }
3489 
3490   if (!rank) {
3491     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3492 
3493     /* read in my part of the matrix numerical values  */
3494     nz   = procsnz[0];
3495     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3496 
3497     /* insert into matrix */
3498     jj      = rstart;
3499     smycols = mycols;
3500     svals   = vals;
3501     for (i=0; i<m; i++) {
3502       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3503       smycols += ourlens[i];
3504       svals   += ourlens[i];
3505       jj++;
3506     }
3507 
3508     /* read in other processors and ship out */
3509     for (i=1; i<size; i++) {
3510       nz   = procsnz[i];
3511       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3512       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3513     }
3514     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3515   } else {
3516     /* receive numeric values */
3517     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3518 
3519     /* receive message of values*/
3520     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3521 
3522     /* insert into matrix */
3523     jj      = rstart;
3524     smycols = mycols;
3525     svals   = vals;
3526     for (i=0; i<m; i++) {
3527       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3528       smycols += ourlens[i];
3529       svals   += ourlens[i];
3530       jj++;
3531     }
3532   }
3533   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3534   ierr = PetscFree(vals);CHKERRQ(ierr);
3535   ierr = PetscFree(mycols);CHKERRQ(ierr);
3536   ierr = PetscFree(rowners);CHKERRQ(ierr);
3537   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3538   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3544 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3545 {
3546   PetscErrorCode ierr;
3547   IS             iscol_local;
3548   PetscInt       csize;
3549 
3550   PetscFunctionBegin;
3551   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3552   if (call == MAT_REUSE_MATRIX) {
3553     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3554     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3555   } else {
3556     PetscInt cbs;
3557     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3558     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3559     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3560   }
3561   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3562   if (call == MAT_INITIAL_MATRIX) {
3563     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3564     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3565   }
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3570 #undef __FUNCT__
3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3572 /*
3573     Not great since it makes two copies of the submatrix, first an SeqAIJ
3574   in local and then by concatenating the local matrices the end result.
3575   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3576 
3577   Note: This requires a sequential iscol with all indices.
3578 */
3579 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3580 {
3581   PetscErrorCode ierr;
3582   PetscMPIInt    rank,size;
3583   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3584   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3585   PetscBool      allcolumns, colflag;
3586   Mat            M,Mreuse;
3587   MatScalar      *vwork,*aa;
3588   MPI_Comm       comm;
3589   Mat_SeqAIJ     *aij;
3590 
3591   PetscFunctionBegin;
3592   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3593   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3595 
3596   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3597   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3598   if (colflag && ncol == mat->cmap->N) {
3599     allcolumns = PETSC_TRUE;
3600   } else {
3601     allcolumns = PETSC_FALSE;
3602   }
3603   if (call ==  MAT_REUSE_MATRIX) {
3604     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3605     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3606     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3607   } else {
3608     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3609   }
3610 
3611   /*
3612       m - number of local rows
3613       n - number of columns (same on all processors)
3614       rstart - first row in new global matrix generated
3615   */
3616   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3617   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3618   if (call == MAT_INITIAL_MATRIX) {
3619     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3620     ii  = aij->i;
3621     jj  = aij->j;
3622 
3623     /*
3624         Determine the number of non-zeros in the diagonal and off-diagonal
3625         portions of the matrix in order to do correct preallocation
3626     */
3627 
3628     /* first get start and end of "diagonal" columns */
3629     if (csize == PETSC_DECIDE) {
3630       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3631       if (mglobal == n) { /* square matrix */
3632         nlocal = m;
3633       } else {
3634         nlocal = n/size + ((n % size) > rank);
3635       }
3636     } else {
3637       nlocal = csize;
3638     }
3639     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3640     rstart = rend - nlocal;
3641     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3642 
3643     /* next, compute all the lengths */
3644     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (*jj < rstart || *jj >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3659     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3660     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3661     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3662     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3663     ierr = PetscFree(dlens);CHKERRQ(ierr);
3664   } else {
3665     PetscInt ml,nl;
3666 
3667     M    = *newmat;
3668     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3669     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3670     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3671     /*
3672          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3673        rather than the slower MatSetValues().
3674     */
3675     M->was_assembled = PETSC_TRUE;
3676     M->assembled     = PETSC_FALSE;
3677   }
3678   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3679   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3680   ii   = aij->i;
3681   jj   = aij->j;
3682   aa   = aij->a;
3683   for (i=0; i<m; i++) {
3684     row   = rstart + i;
3685     nz    = ii[i+1] - ii[i];
3686     cwork = jj;     jj += nz;
3687     vwork = aa;     aa += nz;
3688     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690 
3691   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   *newmat = M;
3694 
3695   /* save submatrix used in processor for next request */
3696   if (call ==  MAT_INITIAL_MATRIX) {
3697     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3698     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3699   }
3700   PetscFunctionReturn(0);
3701 }
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3705 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3706 {
3707   PetscInt       m,cstart, cend,j,nnz,i,d;
3708   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3709   const PetscInt *JJ;
3710   PetscScalar    *values;
3711   PetscErrorCode ierr;
3712 
3713   PetscFunctionBegin;
3714   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3715 
3716   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3717   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3718   m      = B->rmap->n;
3719   cstart = B->cmap->rstart;
3720   cend   = B->cmap->rend;
3721   rstart = B->rmap->rstart;
3722 
3723   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3724 
3725 #if defined(PETSC_USE_DEBUGGING)
3726   for (i=0; i<m; i++) {
3727     nnz = Ii[i+1]- Ii[i];
3728     JJ  = J + Ii[i];
3729     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3730     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3731     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3732   }
3733 #endif
3734 
3735   for (i=0; i<m; i++) {
3736     nnz     = Ii[i+1]- Ii[i];
3737     JJ      = J + Ii[i];
3738     nnz_max = PetscMax(nnz_max,nnz);
3739     d       = 0;
3740     for (j=0; j<nnz; j++) {
3741       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3742     }
3743     d_nnz[i] = d;
3744     o_nnz[i] = nnz - d;
3745   }
3746   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3747   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3748 
3749   if (v) values = (PetscScalar*)v;
3750   else {
3751     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3752   }
3753 
3754   for (i=0; i<m; i++) {
3755     ii   = i + rstart;
3756     nnz  = Ii[i+1]- Ii[i];
3757     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3758   }
3759   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761 
3762   if (!v) {
3763     ierr = PetscFree(values);CHKERRQ(ierr);
3764   }
3765   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3771 /*@
3772    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3773    (the default parallel PETSc format).
3774 
3775    Collective on MPI_Comm
3776 
3777    Input Parameters:
3778 +  B - the matrix
3779 .  i - the indices into j for the start of each local row (starts with zero)
3780 .  j - the column indices for each local row (starts with zero)
3781 -  v - optional values in the matrix
3782 
3783    Level: developer
3784 
3785    Notes:
3786        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3787      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3788      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3789 
3790        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3791 
3792        The format which is used for the sparse matrix input, is equivalent to a
3793     row-major ordering.. i.e for the following matrix, the input data expected is
3794     as shown:
3795 
3796         1 0 0
3797         2 0 3     P0
3798        -------
3799         4 5 6     P1
3800 
3801      Process0 [P0]: rows_owned=[0,1]
3802         i =  {0,1,3}  [size = nrow+1  = 2+1]
3803         j =  {0,0,2}  [size = nz = 6]
3804         v =  {1,2,3}  [size = nz = 6]
3805 
3806      Process1 [P1]: rows_owned=[2]
3807         i =  {0,3}    [size = nrow+1  = 1+1]
3808         j =  {0,1,2}  [size = nz = 6]
3809         v =  {4,5,6}  [size = nz = 6]
3810 
3811 .keywords: matrix, aij, compressed row, sparse, parallel
3812 
3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3814           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3815 @*/
3816 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3817 {
3818   PetscErrorCode ierr;
3819 
3820   PetscFunctionBegin;
3821   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3827 /*@C
3828    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3829    (the default parallel PETSc format).  For good matrix assembly performance
3830    the user should preallocate the matrix storage by setting the parameters
3831    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3832    performance can be increased by more than a factor of 50.
3833 
3834    Collective on MPI_Comm
3835 
3836    Input Parameters:
3837 +  A - the matrix
3838 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3839            (same value is used for all local rows)
3840 .  d_nnz - array containing the number of nonzeros in the various rows of the
3841            DIAGONAL portion of the local submatrix (possibly different for each row)
3842            or NULL, if d_nz is used to specify the nonzero structure.
3843            The size of this array is equal to the number of local rows, i.e 'm'.
3844            For matrices that will be factored, you must leave room for (and set)
3845            the diagonal entry even if it is zero.
3846 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3847            submatrix (same value is used for all local rows).
3848 -  o_nnz - array containing the number of nonzeros in the various rows of the
3849            OFF-DIAGONAL portion of the local submatrix (possibly different for
3850            each row) or NULL, if o_nz is used to specify the nonzero
3851            structure. The size of this array is equal to the number
3852            of local rows, i.e 'm'.
3853 
3854    If the *_nnz parameter is given then the *_nz parameter is ignored
3855 
3856    The AIJ format (also called the Yale sparse matrix format or
3857    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3858    storage.  The stored row and column indices begin with zero.
3859    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3860 
3861    The parallel matrix is partitioned such that the first m0 rows belong to
3862    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3863    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3864 
3865    The DIAGONAL portion of the local submatrix of a processor can be defined
3866    as the submatrix which is obtained by extraction the part corresponding to
3867    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3868    first row that belongs to the processor, r2 is the last row belonging to
3869    the this processor, and c1-c2 is range of indices of the local part of a
3870    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3871    common case of a square matrix, the row and column ranges are the same and
3872    the DIAGONAL part is also square. The remaining portion of the local
3873    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3874 
3875    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3876 
3877    You can call MatGetInfo() to get information on how effective the preallocation was;
3878    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3879    You can also run with the option -info and look for messages with the string
3880    malloc in them to see if additional memory allocation was needed.
3881 
3882    Example usage:
3883 
3884    Consider the following 8x8 matrix with 34 non-zero values, that is
3885    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3886    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3887    as follows:
3888 
3889 .vb
3890             1  2  0  |  0  3  0  |  0  4
3891     Proc0   0  5  6  |  7  0  0  |  8  0
3892             9  0 10  | 11  0  0  | 12  0
3893     -------------------------------------
3894            13  0 14  | 15 16 17  |  0  0
3895     Proc1   0 18  0  | 19 20 21  |  0  0
3896             0  0  0  | 22 23  0  | 24  0
3897     -------------------------------------
3898     Proc2  25 26 27  |  0  0 28  | 29  0
3899            30  0  0  | 31 32 33  |  0 34
3900 .ve
3901 
3902    This can be represented as a collection of submatrices as:
3903 
3904 .vb
3905       A B C
3906       D E F
3907       G H I
3908 .ve
3909 
3910    Where the submatrices A,B,C are owned by proc0, D,E,F are
3911    owned by proc1, G,H,I are owned by proc2.
3912 
3913    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3915    The 'M','N' parameters are 8,8, and have the same values on all procs.
3916 
3917    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3918    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3919    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3920    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3921    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3922    matrix, ans [DF] as another SeqAIJ matrix.
3923 
3924    When d_nz, o_nz parameters are specified, d_nz storage elements are
3925    allocated for every row of the local diagonal submatrix, and o_nz
3926    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3927    One way to choose d_nz and o_nz is to use the max nonzerors per local
3928    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3929    In this case, the values of d_nz,o_nz are:
3930 .vb
3931      proc0 : dnz = 2, o_nz = 2
3932      proc1 : dnz = 3, o_nz = 2
3933      proc2 : dnz = 1, o_nz = 4
3934 .ve
3935    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3936    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3937    for proc3. i.e we are using 12+15+10=37 storage locations to store
3938    34 values.
3939 
3940    When d_nnz, o_nnz parameters are specified, the storage is specified
3941    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3942    In the above case the values for d_nnz,o_nnz are:
3943 .vb
3944      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3945      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3946      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3947 .ve
3948    Here the space allocated is sum of all the above values i.e 34, and
3949    hence pre-allocation is perfect.
3950 
3951    Level: intermediate
3952 
3953 .keywords: matrix, aij, compressed row, sparse, parallel
3954 
3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3956           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3957 @*/
3958 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3959 {
3960   PetscErrorCode ierr;
3961 
3962   PetscFunctionBegin;
3963   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3964   PetscValidType(B,1);
3965   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3971 /*@
3972      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3973          CSR format the local rows.
3974 
3975    Collective on MPI_Comm
3976 
3977    Input Parameters:
3978 +  comm - MPI communicator
3979 .  m - number of local rows (Cannot be PETSC_DECIDE)
3980 .  n - This value should be the same as the local size used in creating the
3981        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3982        calculated if N is given) For square matrices n is almost always m.
3983 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3984 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3985 .   i - row indices
3986 .   j - column indices
3987 -   a - matrix values
3988 
3989    Output Parameter:
3990 .   mat - the matrix
3991 
3992    Level: intermediate
3993 
3994    Notes:
3995        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3996      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3997      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3998 
3999        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4000 
4001        The format which is used for the sparse matrix input, is equivalent to a
4002     row-major ordering.. i.e for the following matrix, the input data expected is
4003     as shown:
4004 
4005         1 0 0
4006         2 0 3     P0
4007        -------
4008         4 5 6     P1
4009 
4010      Process0 [P0]: rows_owned=[0,1]
4011         i =  {0,1,3}  [size = nrow+1  = 2+1]
4012         j =  {0,0,2}  [size = nz = 6]
4013         v =  {1,2,3}  [size = nz = 6]
4014 
4015      Process1 [P1]: rows_owned=[2]
4016         i =  {0,3}    [size = nrow+1  = 1+1]
4017         j =  {0,1,2}  [size = nz = 6]
4018         v =  {4,5,6}  [size = nz = 6]
4019 
4020 .keywords: matrix, aij, compressed row, sparse, parallel
4021 
4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4023           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4024 @*/
4025 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4026 {
4027   PetscErrorCode ierr;
4028 
4029   PetscFunctionBegin;
4030   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4031   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4032   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4033   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4034   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4035   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4036   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4037   PetscFunctionReturn(0);
4038 }
4039 
4040 #undef __FUNCT__
4041 #define __FUNCT__ "MatCreateAIJ"
4042 /*@C
4043    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4044    (the default parallel PETSc format).  For good matrix assembly performance
4045    the user should preallocate the matrix storage by setting the parameters
4046    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4047    performance can be increased by more than a factor of 50.
4048 
4049    Collective on MPI_Comm
4050 
4051    Input Parameters:
4052 +  comm - MPI communicator
4053 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4054            This value should be the same as the local size used in creating the
4055            y vector for the matrix-vector product y = Ax.
4056 .  n - This value should be the same as the local size used in creating the
4057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4058        calculated if N is given) For square matrices n is almost always m.
4059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4061 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4062            (same value is used for all local rows)
4063 .  d_nnz - array containing the number of nonzeros in the various rows of the
4064            DIAGONAL portion of the local submatrix (possibly different for each row)
4065            or NULL, if d_nz is used to specify the nonzero structure.
4066            The size of this array is equal to the number of local rows, i.e 'm'.
4067 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4068            submatrix (same value is used for all local rows).
4069 -  o_nnz - array containing the number of nonzeros in the various rows of the
4070            OFF-DIAGONAL portion of the local submatrix (possibly different for
4071            each row) or NULL, if o_nz is used to specify the nonzero
4072            structure. The size of this array is equal to the number
4073            of local rows, i.e 'm'.
4074 
4075    Output Parameter:
4076 .  A - the matrix
4077 
4078    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4079    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4080    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4081 
4082    Notes:
4083    If the *_nnz parameter is given then the *_nz parameter is ignored
4084 
4085    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4086    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4087    storage requirements for this matrix.
4088 
4089    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4090    processor than it must be used on all processors that share the object for
4091    that argument.
4092 
4093    The user MUST specify either the local or global matrix dimensions
4094    (possibly both).
4095 
4096    The parallel matrix is partitioned across processors such that the
4097    first m0 rows belong to process 0, the next m1 rows belong to
4098    process 1, the next m2 rows belong to process 2 etc.. where
4099    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4100    values corresponding to [m x N] submatrix.
4101 
4102    The columns are logically partitioned with the n0 columns belonging
4103    to 0th partition, the next n1 columns belonging to the next
4104    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4105 
4106    The DIAGONAL portion of the local submatrix on any given processor
4107    is the submatrix corresponding to the rows and columns m,n
4108    corresponding to the given processor. i.e diagonal matrix on
4109    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4110    etc. The remaining portion of the local submatrix [m x (N-n)]
4111    constitute the OFF-DIAGONAL portion. The example below better
4112    illustrates this concept.
4113 
4114    For a square global matrix we define each processor's diagonal portion
4115    to be its local rows and the corresponding columns (a square submatrix);
4116    each processor's off-diagonal portion encompasses the remainder of the
4117    local matrix (a rectangular submatrix).
4118 
4119    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4120 
4121    When calling this routine with a single process communicator, a matrix of
4122    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4123    type of communicator, use the construction mechanism:
4124      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4125 
4126    By default, this format uses inodes (identical nodes) when possible.
4127    We search for consecutive rows with the same nonzero structure, thereby
4128    reusing matrix information to achieve increased efficiency.
4129 
4130    Options Database Keys:
4131 +  -mat_no_inode  - Do not use inodes
4132 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4133 -  -mat_aij_oneindex - Internally use indexing starting at 1
4134         rather than 0.  Note that when calling MatSetValues(),
4135         the user still MUST index entries starting at 0!
4136 
4137 
4138    Example usage:
4139 
4140    Consider the following 8x8 matrix with 34 non-zero values, that is
4141    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4142    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4143    as follows:
4144 
4145 .vb
4146             1  2  0  |  0  3  0  |  0  4
4147     Proc0   0  5  6  |  7  0  0  |  8  0
4148             9  0 10  | 11  0  0  | 12  0
4149     -------------------------------------
4150            13  0 14  | 15 16 17  |  0  0
4151     Proc1   0 18  0  | 19 20 21  |  0  0
4152             0  0  0  | 22 23  0  | 24  0
4153     -------------------------------------
4154     Proc2  25 26 27  |  0  0 28  | 29  0
4155            30  0  0  | 31 32 33  |  0 34
4156 .ve
4157 
4158    This can be represented as a collection of submatrices as:
4159 
4160 .vb
4161       A B C
4162       D E F
4163       G H I
4164 .ve
4165 
4166    Where the submatrices A,B,C are owned by proc0, D,E,F are
4167    owned by proc1, G,H,I are owned by proc2.
4168 
4169    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4170    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4171    The 'M','N' parameters are 8,8, and have the same values on all procs.
4172 
4173    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4174    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4175    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4176    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4177    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4178    matrix, ans [DF] as another SeqAIJ matrix.
4179 
4180    When d_nz, o_nz parameters are specified, d_nz storage elements are
4181    allocated for every row of the local diagonal submatrix, and o_nz
4182    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4183    One way to choose d_nz and o_nz is to use the max nonzerors per local
4184    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4185    In this case, the values of d_nz,o_nz are:
4186 .vb
4187      proc0 : dnz = 2, o_nz = 2
4188      proc1 : dnz = 3, o_nz = 2
4189      proc2 : dnz = 1, o_nz = 4
4190 .ve
4191    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4192    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4193    for proc3. i.e we are using 12+15+10=37 storage locations to store
4194    34 values.
4195 
4196    When d_nnz, o_nnz parameters are specified, the storage is specified
4197    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4198    In the above case the values for d_nnz,o_nnz are:
4199 .vb
4200      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4201      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4202      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4203 .ve
4204    Here the space allocated is sum of all the above values i.e 34, and
4205    hence pre-allocation is perfect.
4206 
4207    Level: intermediate
4208 
4209 .keywords: matrix, aij, compressed row, sparse, parallel
4210 
4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4212           MPIAIJ, MatCreateMPIAIJWithArrays()
4213 @*/
4214 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4215 {
4216   PetscErrorCode ierr;
4217   PetscMPIInt    size;
4218 
4219   PetscFunctionBegin;
4220   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4221   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4222   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4223   if (size > 1) {
4224     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4225     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4226   } else {
4227     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4228     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4229   }
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 #undef __FUNCT__
4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4235 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4236 {
4237   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4238 
4239   PetscFunctionBegin;
4240   *Ad     = a->A;
4241   *Ao     = a->B;
4242   *colmap = a->garray;
4243   PetscFunctionReturn(0);
4244 }
4245 
4246 #undef __FUNCT__
4247 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4249 {
4250   PetscErrorCode ierr;
4251   PetscInt       i;
4252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4253 
4254   PetscFunctionBegin;
4255   if (coloring->ctype == IS_COLORING_GLOBAL) {
4256     ISColoringValue *allcolors,*colors;
4257     ISColoring      ocoloring;
4258 
4259     /* set coloring for diagonal portion */
4260     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4261 
4262     /* set coloring for off-diagonal portion */
4263     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4264     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4265     for (i=0; i<a->B->cmap->n; i++) {
4266       colors[i] = allcolors[a->garray[i]];
4267     }
4268     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4269     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4270     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4271     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4272   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4273     ISColoringValue *colors;
4274     PetscInt        *larray;
4275     ISColoring      ocoloring;
4276 
4277     /* set coloring for diagonal portion */
4278     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4279     for (i=0; i<a->A->cmap->n; i++) {
4280       larray[i] = i + A->cmap->rstart;
4281     }
4282     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4283     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4284     for (i=0; i<a->A->cmap->n; i++) {
4285       colors[i] = coloring->colors[larray[i]];
4286     }
4287     ierr = PetscFree(larray);CHKERRQ(ierr);
4288     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4289     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4290     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4291 
4292     /* set coloring for off-diagonal portion */
4293     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4294     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4295     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4296     for (i=0; i<a->B->cmap->n; i++) {
4297       colors[i] = coloring->colors[larray[i]];
4298     }
4299     ierr = PetscFree(larray);CHKERRQ(ierr);
4300     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4301     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4302     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4303   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4310 {
4311   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4312   PetscErrorCode ierr;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4316   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4322 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4323 {
4324   PetscErrorCode ierr;
4325   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4326   PetscInt       *indx;
4327 
4328   PetscFunctionBegin;
4329   /* This routine will ONLY return MPIAIJ type matrix */
4330   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4331   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4332   if (n == PETSC_DECIDE) {
4333     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4334   }
4335   /* Check sum(n) = N */
4336   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4338 
4339   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4340   rstart -= m;
4341 
4342   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4343   for (i=0; i<m; i++) {
4344     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4345     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4346     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4347   }
4348 
4349   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4350   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4351   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4352   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4353   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4354   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4360 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4361 {
4362   PetscErrorCode ierr;
4363   PetscInt       m,N,i,rstart,nnz,Ii;
4364   PetscInt       *indx;
4365   PetscScalar    *values;
4366 
4367   PetscFunctionBegin;
4368   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4369   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4370   for (i=0; i<m; i++) {
4371     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372     Ii   = i + rstart;
4373     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4374     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4375   }
4376   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4383 /*@
4384       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4385                  matrices from each processor
4386 
4387     Collective on MPI_Comm
4388 
4389    Input Parameters:
4390 +    comm - the communicators the parallel matrix will live on
4391 .    inmat - the input sequential matrices
4392 .    n - number of local columns (or PETSC_DECIDE)
4393 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4394 
4395    Output Parameter:
4396 .    outmat - the parallel matrix generated
4397 
4398     Level: advanced
4399 
4400    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4401 
4402 @*/
4403 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4404 {
4405   PetscErrorCode ierr;
4406   PetscMPIInt    size;
4407 
4408   PetscFunctionBegin;
4409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4410   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4411   if (size == 1) {
4412     if (scall == MAT_INITIAL_MATRIX) {
4413       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4414     } else {
4415       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4416     }
4417   } else {
4418     if (scall == MAT_INITIAL_MATRIX) {
4419       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4420     }
4421     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4422   }
4423   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatFileSplit"
4429 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4430 {
4431   PetscErrorCode    ierr;
4432   PetscMPIInt       rank;
4433   PetscInt          m,N,i,rstart,nnz;
4434   size_t            len;
4435   const PetscInt    *indx;
4436   PetscViewer       out;
4437   char              *name;
4438   Mat               B;
4439   const PetscScalar *values;
4440 
4441   PetscFunctionBegin;
4442   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4443   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4444   /* Should this be the type of the diagonal block of A? */
4445   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4446   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4447   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4448   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4449   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4450   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4451   for (i=0; i<m; i++) {
4452     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4453     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4454     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4455   }
4456   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4458 
4459   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4460   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4461   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4462   sprintf(name,"%s.%d",outfile,rank);
4463   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4464   ierr = PetscFree(name);CHKERRQ(ierr);
4465   ierr = MatView(B,out);CHKERRQ(ierr);
4466   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4467   ierr = MatDestroy(&B);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4474 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4475 {
4476   PetscErrorCode      ierr;
4477   Mat_Merge_SeqsToMPI *merge;
4478   PetscContainer      container;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4482   if (container) {
4483     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4496     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4497     ierr = PetscFree(merge);CHKERRQ(ierr);
4498     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4499   }
4500   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 #include <../src/mat/utils/freespace.h>
4505 #include <petscbt.h>
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4509 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4510 {
4511   PetscErrorCode      ierr;
4512   MPI_Comm            comm;
4513   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4514   PetscMPIInt         size,rank,taga,*len_s;
4515   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4516   PetscInt            proc,m;
4517   PetscInt            **buf_ri,**buf_rj;
4518   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4519   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4520   MPI_Request         *s_waits,*r_waits;
4521   MPI_Status          *status;
4522   MatScalar           *aa=a->a;
4523   MatScalar           **abuf_r,*ba_i;
4524   Mat_Merge_SeqsToMPI *merge;
4525   PetscContainer      container;
4526 
4527   PetscFunctionBegin;
4528   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4529   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4530 
4531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4533 
4534   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4535   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4536 
4537   bi     = merge->bi;
4538   bj     = merge->bj;
4539   buf_ri = merge->buf_ri;
4540   buf_rj = merge->buf_rj;
4541 
4542   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4543   owners = merge->rowmap->range;
4544   len_s  = merge->len_s;
4545 
4546   /* send and recv matrix values */
4547   /*-----------------------------*/
4548   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4549   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4550 
4551   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4552   for (proc=0,k=0; proc<size; proc++) {
4553     if (!len_s[proc]) continue;
4554     i    = owners[proc];
4555     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4556     k++;
4557   }
4558 
4559   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4560   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4561   ierr = PetscFree(status);CHKERRQ(ierr);
4562 
4563   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4564   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4565 
4566   /* insert mat values of mpimat */
4567   /*----------------------------*/
4568   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4569   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4570 
4571   for (k=0; k<merge->nrecv; k++) {
4572     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4573     nrows       = *(buf_ri_k[k]);
4574     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4575     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4576   }
4577 
4578   /* set values of ba */
4579   m = merge->rowmap->n;
4580   for (i=0; i<m; i++) {
4581     arow = owners[rank] + i;
4582     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4583     bnzi = bi[i+1] - bi[i];
4584     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4585 
4586     /* add local non-zero vals of this proc's seqmat into ba */
4587     anzi   = ai[arow+1] - ai[arow];
4588     aj     = a->j + ai[arow];
4589     aa     = a->a + ai[arow];
4590     nextaj = 0;
4591     for (j=0; nextaj<anzi; j++) {
4592       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4593         ba_i[j] += aa[nextaj++];
4594       }
4595     }
4596 
4597     /* add received vals into ba */
4598     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4599       /* i-th row */
4600       if (i == *nextrow[k]) {
4601         anzi   = *(nextai[k]+1) - *nextai[k];
4602         aj     = buf_rj[k] + *(nextai[k]);
4603         aa     = abuf_r[k] + *(nextai[k]);
4604         nextaj = 0;
4605         for (j=0; nextaj<anzi; j++) {
4606           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4607             ba_i[j] += aa[nextaj++];
4608           }
4609         }
4610         nextrow[k]++; nextai[k]++;
4611       }
4612     }
4613     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4614   }
4615   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617 
4618   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4619   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4620   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4621   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4622   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4623   PetscFunctionReturn(0);
4624 }
4625 
4626 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4627 
4628 #undef __FUNCT__
4629 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4630 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4631 {
4632   PetscErrorCode      ierr;
4633   Mat                 B_mpi;
4634   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4635   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4636   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4637   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4638   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4639   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4640   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4641   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4642   MPI_Status          *status;
4643   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4644   PetscBT             lnkbt;
4645   Mat_Merge_SeqsToMPI *merge;
4646   PetscContainer      container;
4647 
4648   PetscFunctionBegin;
4649   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4650 
4651   /* make sure it is a PETSc comm */
4652   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4655 
4656   ierr = PetscNew(&merge);CHKERRQ(ierr);
4657   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4658 
4659   /* determine row ownership */
4660   /*---------------------------------------------------------*/
4661   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4667   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4668 
4669   m      = merge->rowmap->n;
4670   owners = merge->rowmap->range;
4671 
4672   /* determine the number of messages to send, their lengths */
4673   /*---------------------------------------------------------*/
4674   len_s = merge->len_s;
4675 
4676   len          = 0; /* length of buf_si[] */
4677   merge->nsend = 0;
4678   for (proc=0; proc<size; proc++) {
4679     len_si[proc] = 0;
4680     if (proc == rank) {
4681       len_s[proc] = 0;
4682     } else {
4683       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4684       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4685     }
4686     if (len_s[proc]) {
4687       merge->nsend++;
4688       nrows = 0;
4689       for (i=owners[proc]; i<owners[proc+1]; i++) {
4690         if (ai[i+1] > ai[i]) nrows++;
4691       }
4692       len_si[proc] = 2*(nrows+1);
4693       len         += len_si[proc];
4694     }
4695   }
4696 
4697   /* determine the number and length of messages to receive for ij-structure */
4698   /*-------------------------------------------------------------------------*/
4699   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4700   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4701 
4702   /* post the Irecv of j-structure */
4703   /*-------------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4706 
4707   /* post the Isend of j-structure */
4708   /*--------------------------------*/
4709   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4710 
4711   for (proc=0, k=0; proc<size; proc++) {
4712     if (!len_s[proc]) continue;
4713     i    = owners[proc];
4714     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4715     k++;
4716   }
4717 
4718   /* receives and sends of j-structure are complete */
4719   /*------------------------------------------------*/
4720   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4721   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4722 
4723   /* send and recv i-structure */
4724   /*---------------------------*/
4725   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4726   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4727 
4728   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4729   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4730   for (proc=0,k=0; proc<size; proc++) {
4731     if (!len_s[proc]) continue;
4732     /* form outgoing message for i-structure:
4733          buf_si[0]:                 nrows to be sent
4734                [1:nrows]:           row index (global)
4735                [nrows+1:2*nrows+1]: i-structure index
4736     */
4737     /*-------------------------------------------*/
4738     nrows       = len_si[proc]/2 - 1;
4739     buf_si_i    = buf_si + nrows+1;
4740     buf_si[0]   = nrows;
4741     buf_si_i[0] = 0;
4742     nrows       = 0;
4743     for (i=owners[proc]; i<owners[proc+1]; i++) {
4744       anzi = ai[i+1] - ai[i];
4745       if (anzi) {
4746         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4747         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4748         nrows++;
4749       }
4750     }
4751     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4752     k++;
4753     buf_si += len_si[proc];
4754   }
4755 
4756   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4757   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4758 
4759   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4760   for (i=0; i<merge->nrecv; i++) {
4761     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4762   }
4763 
4764   ierr = PetscFree(len_si);CHKERRQ(ierr);
4765   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4766   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4767   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4768   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4769   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4770   ierr = PetscFree(status);CHKERRQ(ierr);
4771 
4772   /* compute a local seq matrix in each processor */
4773   /*----------------------------------------------*/
4774   /* allocate bi array and free space for accumulating nonzero column info */
4775   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4776   bi[0] = 0;
4777 
4778   /* create and initialize a linked list */
4779   nlnk = N+1;
4780   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4781 
4782   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4783   len  = ai[owners[rank+1]] - ai[owners[rank]];
4784   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4785 
4786   current_space = free_space;
4787 
4788   /* determine symbolic info for each local row */
4789   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4790 
4791   for (k=0; k<merge->nrecv; k++) {
4792     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4793     nrows       = *buf_ri_k[k];
4794     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4795     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4796   }
4797 
4798   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4799   len  = 0;
4800   for (i=0; i<m; i++) {
4801     bnzi = 0;
4802     /* add local non-zero cols of this proc's seqmat into lnk */
4803     arow  = owners[rank] + i;
4804     anzi  = ai[arow+1] - ai[arow];
4805     aj    = a->j + ai[arow];
4806     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4807     bnzi += nlnk;
4808     /* add received col data into lnk */
4809     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4810       if (i == *nextrow[k]) { /* i-th row */
4811         anzi  = *(nextai[k]+1) - *nextai[k];
4812         aj    = buf_rj[k] + *nextai[k];
4813         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4814         bnzi += nlnk;
4815         nextrow[k]++; nextai[k]++;
4816       }
4817     }
4818     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4819 
4820     /* if free space is not available, make more free space */
4821     if (current_space->local_remaining<bnzi) {
4822       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4823       nspacedouble++;
4824     }
4825     /* copy data into free space, then initialize lnk */
4826     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4827     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4828 
4829     current_space->array           += bnzi;
4830     current_space->local_used      += bnzi;
4831     current_space->local_remaining -= bnzi;
4832 
4833     bi[i+1] = bi[i] + bnzi;
4834   }
4835 
4836   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4837 
4838   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4839   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4840   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4841 
4842   /* create symbolic parallel matrix B_mpi */
4843   /*---------------------------------------*/
4844   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4845   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4846   if (n==PETSC_DECIDE) {
4847     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4848   } else {
4849     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4850   }
4851   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4852   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4853   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4854   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4855   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4856 
4857   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4858   B_mpi->assembled    = PETSC_FALSE;
4859   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4860   merge->bi           = bi;
4861   merge->bj           = bj;
4862   merge->buf_ri       = buf_ri;
4863   merge->buf_rj       = buf_rj;
4864   merge->coi          = NULL;
4865   merge->coj          = NULL;
4866   merge->owners_co    = NULL;
4867 
4868   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4869 
4870   /* attach the supporting struct to B_mpi for reuse */
4871   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4872   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4873   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4874   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4875   *mpimat = B_mpi;
4876 
4877   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4878   PetscFunctionReturn(0);
4879 }
4880 
4881 #undef __FUNCT__
4882 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4883 /*@C
4884       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4885                  matrices from each processor
4886 
4887     Collective on MPI_Comm
4888 
4889    Input Parameters:
4890 +    comm - the communicators the parallel matrix will live on
4891 .    seqmat - the input sequential matrices
4892 .    m - number of local rows (or PETSC_DECIDE)
4893 .    n - number of local columns (or PETSC_DECIDE)
4894 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4895 
4896    Output Parameter:
4897 .    mpimat - the parallel matrix generated
4898 
4899     Level: advanced
4900 
4901    Notes:
4902      The dimensions of the sequential matrix in each processor MUST be the same.
4903      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4904      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4905 @*/
4906 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4907 {
4908   PetscErrorCode ierr;
4909   PetscMPIInt    size;
4910 
4911   PetscFunctionBegin;
4912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4913   if (size == 1) {
4914     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915     if (scall == MAT_INITIAL_MATRIX) {
4916       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4917     } else {
4918       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4919     }
4920     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921     PetscFunctionReturn(0);
4922   }
4923   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924   if (scall == MAT_INITIAL_MATRIX) {
4925     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4926   }
4927   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4928   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 #undef __FUNCT__
4933 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4934 /*@
4935      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4936           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4937           with MatGetSize()
4938 
4939     Not Collective
4940 
4941    Input Parameters:
4942 +    A - the matrix
4943 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4944 
4945    Output Parameter:
4946 .    A_loc - the local sequential matrix generated
4947 
4948     Level: developer
4949 
4950 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4951 
4952 @*/
4953 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4954 {
4955   PetscErrorCode ierr;
4956   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4957   Mat_SeqAIJ     *mat,*a,*b;
4958   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4959   MatScalar      *aa,*ba,*cam;
4960   PetscScalar    *ca;
4961   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4962   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4963   PetscBool      match;
4964 
4965   PetscFunctionBegin;
4966   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4967   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4968   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4969   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4970   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4971   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4972   aa = a->a; ba = b->a;
4973   if (scall == MAT_INITIAL_MATRIX) {
4974     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4975     ci[0] = 0;
4976     for (i=0; i<am; i++) {
4977       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4978     }
4979     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4980     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4981     k    = 0;
4982     for (i=0; i<am; i++) {
4983       ncols_o = bi[i+1] - bi[i];
4984       ncols_d = ai[i+1] - ai[i];
4985       /* off-diagonal portion of A */
4986       for (jo=0; jo<ncols_o; jo++) {
4987         col = cmap[*bj];
4988         if (col >= cstart) break;
4989         cj[k]   = col; bj++;
4990         ca[k++] = *ba++;
4991       }
4992       /* diagonal portion of A */
4993       for (j=0; j<ncols_d; j++) {
4994         cj[k]   = cstart + *aj++;
4995         ca[k++] = *aa++;
4996       }
4997       /* off-diagonal portion of A */
4998       for (j=jo; j<ncols_o; j++) {
4999         cj[k]   = cmap[*bj++];
5000         ca[k++] = *ba++;
5001       }
5002     }
5003     /* put together the new matrix */
5004     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5005     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5006     /* Since these are PETSc arrays, change flags to free them as necessary. */
5007     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5008     mat->free_a  = PETSC_TRUE;
5009     mat->free_ij = PETSC_TRUE;
5010     mat->nonew   = 0;
5011   } else if (scall == MAT_REUSE_MATRIX) {
5012     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5013     ci = mat->i; cj = mat->j; cam = mat->a;
5014     for (i=0; i<am; i++) {
5015       /* off-diagonal portion of A */
5016       ncols_o = bi[i+1] - bi[i];
5017       for (jo=0; jo<ncols_o; jo++) {
5018         col = cmap[*bj];
5019         if (col >= cstart) break;
5020         *cam++ = *ba++; bj++;
5021       }
5022       /* diagonal portion of A */
5023       ncols_d = ai[i+1] - ai[i];
5024       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5025       /* off-diagonal portion of A */
5026       for (j=jo; j<ncols_o; j++) {
5027         *cam++ = *ba++; bj++;
5028       }
5029     }
5030   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5031   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5032   PetscFunctionReturn(0);
5033 }
5034 
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5037 /*@C
5038      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5039 
5040     Not Collective
5041 
5042    Input Parameters:
5043 +    A - the matrix
5044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5045 -    row, col - index sets of rows and columns to extract (or NULL)
5046 
5047    Output Parameter:
5048 .    A_loc - the local sequential matrix generated
5049 
5050     Level: developer
5051 
5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5053 
5054 @*/
5055 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5056 {
5057   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5058   PetscErrorCode ierr;
5059   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5060   IS             isrowa,iscola;
5061   Mat            *aloc;
5062   PetscBool      match;
5063 
5064   PetscFunctionBegin;
5065   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5066   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5067   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5068   if (!row) {
5069     start = A->rmap->rstart; end = A->rmap->rend;
5070     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5071   } else {
5072     isrowa = *row;
5073   }
5074   if (!col) {
5075     start = A->cmap->rstart;
5076     cmap  = a->garray;
5077     nzA   = a->A->cmap->n;
5078     nzB   = a->B->cmap->n;
5079     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5080     ncols = 0;
5081     for (i=0; i<nzB; i++) {
5082       if (cmap[i] < start) idx[ncols++] = cmap[i];
5083       else break;
5084     }
5085     imark = i;
5086     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5087     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5088     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5089   } else {
5090     iscola = *col;
5091   }
5092   if (scall != MAT_INITIAL_MATRIX) {
5093     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5094     aloc[0] = *A_loc;
5095   }
5096   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5097   *A_loc = aloc[0];
5098   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5099   if (!row) {
5100     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5101   }
5102   if (!col) {
5103     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5104   }
5105   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 #undef __FUNCT__
5110 #define __FUNCT__ "MatGetBrowsOfAcols"
5111 /*@C
5112     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5113 
5114     Collective on Mat
5115 
5116    Input Parameters:
5117 +    A,B - the matrices in mpiaij format
5118 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5119 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5120 
5121    Output Parameter:
5122 +    rowb, colb - index sets of rows and columns of B to extract
5123 -    B_seq - the sequential matrix generated
5124 
5125     Level: developer
5126 
5127 @*/
5128 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5129 {
5130   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5131   PetscErrorCode ierr;
5132   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5133   IS             isrowb,iscolb;
5134   Mat            *bseq=NULL;
5135 
5136   PetscFunctionBegin;
5137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5138     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5139   }
5140   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5141 
5142   if (scall == MAT_INITIAL_MATRIX) {
5143     start = A->cmap->rstart;
5144     cmap  = a->garray;
5145     nzA   = a->A->cmap->n;
5146     nzB   = a->B->cmap->n;
5147     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5148     ncols = 0;
5149     for (i=0; i<nzB; i++) {  /* row < local row index */
5150       if (cmap[i] < start) idx[ncols++] = cmap[i];
5151       else break;
5152     }
5153     imark = i;
5154     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5155     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5156     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5157     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5158   } else {
5159     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5160     isrowb  = *rowb; iscolb = *colb;
5161     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5162     bseq[0] = *B_seq;
5163   }
5164   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5165   *B_seq = bseq[0];
5166   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5167   if (!rowb) {
5168     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5169   } else {
5170     *rowb = isrowb;
5171   }
5172   if (!colb) {
5173     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5174   } else {
5175     *colb = iscolb;
5176   }
5177   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 #undef __FUNCT__
5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5183 /*
5184     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5185     of the OFF-DIAGONAL portion of local A
5186 
5187     Collective on Mat
5188 
5189    Input Parameters:
5190 +    A,B - the matrices in mpiaij format
5191 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5192 
5193    Output Parameter:
5194 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5195 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5196 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5197 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5198 
5199     Level: developer
5200 
5201 */
5202 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5203 {
5204   VecScatter_MPI_General *gen_to,*gen_from;
5205   PetscErrorCode         ierr;
5206   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5207   Mat_SeqAIJ             *b_oth;
5208   VecScatter             ctx =a->Mvctx;
5209   MPI_Comm               comm;
5210   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5211   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5212   PetscScalar            *rvalues,*svalues;
5213   MatScalar              *b_otha,*bufa,*bufA;
5214   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5215   MPI_Request            *rwaits = NULL,*swaits = NULL;
5216   MPI_Status             *sstatus,rstatus;
5217   PetscMPIInt            jj;
5218   PetscInt               *cols,sbs,rbs;
5219   PetscScalar            *vals;
5220 
5221   PetscFunctionBegin;
5222   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5223   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5224     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5225   }
5226   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5228 
5229   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5230   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5231   rvalues  = gen_from->values; /* holds the length of receiving row */
5232   svalues  = gen_to->values;   /* holds the length of sending row */
5233   nrecvs   = gen_from->n;
5234   nsends   = gen_to->n;
5235 
5236   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5237   srow    = gen_to->indices;    /* local row index to be sent */
5238   sstarts = gen_to->starts;
5239   sprocs  = gen_to->procs;
5240   sstatus = gen_to->sstatus;
5241   sbs     = gen_to->bs;
5242   rstarts = gen_from->starts;
5243   rprocs  = gen_from->procs;
5244   rbs     = gen_from->bs;
5245 
5246   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5247   if (scall == MAT_INITIAL_MATRIX) {
5248     /* i-array */
5249     /*---------*/
5250     /*  post receives */
5251     for (i=0; i<nrecvs; i++) {
5252       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5253       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5254       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5255     }
5256 
5257     /* pack the outgoing message */
5258     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5259 
5260     sstartsj[0] = 0;
5261     rstartsj[0] = 0;
5262     len         = 0; /* total length of j or a array to be sent */
5263     k           = 0;
5264     for (i=0; i<nsends; i++) {
5265       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5266       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5267       for (j=0; j<nrows; j++) {
5268         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5269         for (l=0; l<sbs; l++) {
5270           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5271 
5272           rowlen[j*sbs+l] = ncols;
5273 
5274           len += ncols;
5275           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5276         }
5277         k++;
5278       }
5279       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5280 
5281       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5282     }
5283     /* recvs and sends of i-array are completed */
5284     i = nrecvs;
5285     while (i--) {
5286       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5287     }
5288     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5289 
5290     /* allocate buffers for sending j and a arrays */
5291     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5292     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5293 
5294     /* create i-array of B_oth */
5295     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5296 
5297     b_othi[0] = 0;
5298     len       = 0; /* total length of j or a array to be received */
5299     k         = 0;
5300     for (i=0; i<nrecvs; i++) {
5301       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5302       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5303       for (j=0; j<nrows; j++) {
5304         b_othi[k+1] = b_othi[k] + rowlen[j];
5305         len        += rowlen[j]; k++;
5306       }
5307       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5308     }
5309 
5310     /* allocate space for j and a arrrays of B_oth */
5311     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5312     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5313 
5314     /* j-array */
5315     /*---------*/
5316     /*  post receives of j-array */
5317     for (i=0; i<nrecvs; i++) {
5318       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5319       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5320     }
5321 
5322     /* pack the outgoing message j-array */
5323     k = 0;
5324     for (i=0; i<nsends; i++) {
5325       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326       bufJ  = bufj+sstartsj[i];
5327       for (j=0; j<nrows; j++) {
5328         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5329         for (ll=0; ll<sbs; ll++) {
5330           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5331           for (l=0; l<ncols; l++) {
5332             *bufJ++ = cols[l];
5333           }
5334           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5335         }
5336       }
5337       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5338     }
5339 
5340     /* recvs and sends of j-array are completed */
5341     i = nrecvs;
5342     while (i--) {
5343       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5344     }
5345     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5346   } else if (scall == MAT_REUSE_MATRIX) {
5347     sstartsj = *startsj_s;
5348     rstartsj = *startsj_r;
5349     bufa     = *bufa_ptr;
5350     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5351     b_otha   = b_oth->a;
5352   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5353 
5354   /* a-array */
5355   /*---------*/
5356   /*  post receives of a-array */
5357   for (i=0; i<nrecvs; i++) {
5358     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5359     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5360   }
5361 
5362   /* pack the outgoing message a-array */
5363   k = 0;
5364   for (i=0; i<nsends; i++) {
5365     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5366     bufA  = bufa+sstartsj[i];
5367     for (j=0; j<nrows; j++) {
5368       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5369       for (ll=0; ll<sbs; ll++) {
5370         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5371         for (l=0; l<ncols; l++) {
5372           *bufA++ = vals[l];
5373         }
5374         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5375       }
5376     }
5377     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5378   }
5379   /* recvs and sends of a-array are completed */
5380   i = nrecvs;
5381   while (i--) {
5382     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5383   }
5384   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5385   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5386 
5387   if (scall == MAT_INITIAL_MATRIX) {
5388     /* put together the new matrix */
5389     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5390 
5391     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5392     /* Since these are PETSc arrays, change flags to free them as necessary. */
5393     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5394     b_oth->free_a  = PETSC_TRUE;
5395     b_oth->free_ij = PETSC_TRUE;
5396     b_oth->nonew   = 0;
5397 
5398     ierr = PetscFree(bufj);CHKERRQ(ierr);
5399     if (!startsj_s || !bufa_ptr) {
5400       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5401       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5402     } else {
5403       *startsj_s = sstartsj;
5404       *startsj_r = rstartsj;
5405       *bufa_ptr  = bufa;
5406     }
5407   }
5408   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5409   PetscFunctionReturn(0);
5410 }
5411 
5412 #undef __FUNCT__
5413 #define __FUNCT__ "MatGetCommunicationStructs"
5414 /*@C
5415   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5416 
5417   Not Collective
5418 
5419   Input Parameters:
5420 . A - The matrix in mpiaij format
5421 
5422   Output Parameter:
5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5424 . colmap - A map from global column index to local index into lvec
5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5426 
5427   Level: developer
5428 
5429 @*/
5430 #if defined(PETSC_USE_CTABLE)
5431 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5432 #else
5433 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5434 #endif
5435 {
5436   Mat_MPIAIJ *a;
5437 
5438   PetscFunctionBegin;
5439   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5440   PetscValidPointer(lvec, 2);
5441   PetscValidPointer(colmap, 3);
5442   PetscValidPointer(multScatter, 4);
5443   a = (Mat_MPIAIJ*) A->data;
5444   if (lvec) *lvec = a->lvec;
5445   if (colmap) *colmap = a->colmap;
5446   if (multScatter) *multScatter = a->Mvctx;
5447   PetscFunctionReturn(0);
5448 }
5449 
5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5453 
5454 #undef __FUNCT__
5455 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5456 /*
5457     Computes (B'*A')' since computing B*A directly is untenable
5458 
5459                n                       p                          p
5460         (              )       (              )         (                  )
5461       m (      A       )  *  n (       B      )   =   m (         C        )
5462         (              )       (              )         (                  )
5463 
5464 */
5465 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5466 {
5467   PetscErrorCode ierr;
5468   Mat            At,Bt,Ct;
5469 
5470   PetscFunctionBegin;
5471   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5472   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5473   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5474   ierr = MatDestroy(&At);CHKERRQ(ierr);
5475   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5476   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5477   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5478   PetscFunctionReturn(0);
5479 }
5480 
5481 #undef __FUNCT__
5482 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5483 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5484 {
5485   PetscErrorCode ierr;
5486   PetscInt       m=A->rmap->n,n=B->cmap->n;
5487   Mat            Cmat;
5488 
5489   PetscFunctionBegin;
5490   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5491   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5492   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5493   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5494   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5495   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5496   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5498 
5499   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5500 
5501   *C = Cmat;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /* ----------------------------------------------------------------*/
5506 #undef __FUNCT__
5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5509 {
5510   PetscErrorCode ierr;
5511 
5512   PetscFunctionBegin;
5513   if (scall == MAT_INITIAL_MATRIX) {
5514     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5515     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5516     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5517   }
5518   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5519   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5520   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 #if defined(PETSC_HAVE_MUMPS)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_PASTIX)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_SUPERLU_DIST)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_CLIQUE)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5535 #endif
5536 
5537 /*MC
5538    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5539 
5540    Options Database Keys:
5541 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5542 
5543   Level: beginner
5544 
5545 .seealso: MatCreateAIJ()
5546 M*/
5547 
5548 #undef __FUNCT__
5549 #define __FUNCT__ "MatCreate_MPIAIJ"
5550 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5551 {
5552   Mat_MPIAIJ     *b;
5553   PetscErrorCode ierr;
5554   PetscMPIInt    size;
5555 
5556   PetscFunctionBegin;
5557   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5558 
5559   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5560   B->data       = (void*)b;
5561   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5562   B->assembled  = PETSC_FALSE;
5563   B->insertmode = NOT_SET_VALUES;
5564   b->size       = size;
5565 
5566   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5567 
5568   /* build cache for off array entries formed */
5569   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5570 
5571   b->donotstash  = PETSC_FALSE;
5572   b->colmap      = 0;
5573   b->garray      = 0;
5574   b->roworiented = PETSC_TRUE;
5575 
5576   /* stuff used for matrix vector multiply */
5577   b->lvec  = NULL;
5578   b->Mvctx = NULL;
5579 
5580   /* stuff for MatGetRow() */
5581   b->rowindices   = 0;
5582   b->rowvalues    = 0;
5583   b->getrowactive = PETSC_FALSE;
5584 
5585   /* flexible pointer used in CUSP/CUSPARSE classes */
5586   b->spptr = NULL;
5587 
5588 #if defined(PETSC_HAVE_MUMPS)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_PASTIX)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5593 #endif
5594 #if defined(PETSC_HAVE_SUPERLU_DIST)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5596 #endif
5597 #if defined(PETSC_HAVE_CLIQUE)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5599 #endif
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5614   PetscFunctionReturn(0);
5615 }
5616 
5617 #undef __FUNCT__
5618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5619 /*@
5620      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5621          and "off-diagonal" part of the matrix in CSR format.
5622 
5623    Collective on MPI_Comm
5624 
5625    Input Parameters:
5626 +  comm - MPI communicator
5627 .  m - number of local rows (Cannot be PETSC_DECIDE)
5628 .  n - This value should be the same as the local size used in creating the
5629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5630        calculated if N is given) For square matrices n is almost always m.
5631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5633 .   i - row indices for "diagonal" portion of matrix
5634 .   j - column indices
5635 .   a - matrix values
5636 .   oi - row indices for "off-diagonal" portion of matrix
5637 .   oj - column indices
5638 -   oa - matrix values
5639 
5640    Output Parameter:
5641 .   mat - the matrix
5642 
5643    Level: advanced
5644 
5645    Notes:
5646        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5647        must free the arrays once the matrix has been destroyed and not before.
5648 
5649        The i and j indices are 0 based
5650 
5651        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5652 
5653        This sets local rows and cannot be used to set off-processor values.
5654 
5655        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5656        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5657        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5658        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5659        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5660        communication if it is known that only local entries will be set.
5661 
5662 .keywords: matrix, aij, compressed row, sparse, parallel
5663 
5664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5665           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5666 @*/
5667 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5668 {
5669   PetscErrorCode ierr;
5670   Mat_MPIAIJ     *maij;
5671 
5672   PetscFunctionBegin;
5673   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5674   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5675   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5676   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5677   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5678   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5679   maij = (Mat_MPIAIJ*) (*mat)->data;
5680 
5681   (*mat)->preallocated = PETSC_TRUE;
5682 
5683   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5684   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5685 
5686   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5687   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5688 
5689   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693 
5694   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5697   PetscFunctionReturn(0);
5698 }
5699 
5700 /*
5701     Special version for direct calls from Fortran
5702 */
5703 #include <petsc-private/fortranimpl.h>
5704 
5705 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5706 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5708 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5709 #endif
5710 
5711 /* Change these macros so can be used in void function */
5712 #undef CHKERRQ
5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5714 #undef SETERRQ2
5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5716 #undef SETERRQ3
5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5718 #undef SETERRQ
5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5720 
5721 #undef __FUNCT__
5722 #define __FUNCT__ "matsetvaluesmpiaij_"
5723 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5724 {
5725   Mat            mat  = *mmat;
5726   PetscInt       m    = *mm, n = *mn;
5727   InsertMode     addv = *maddv;
5728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5729   PetscScalar    value;
5730   PetscErrorCode ierr;
5731 
5732   MatCheckPreallocated(mat,1);
5733   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5734 
5735 #if defined(PETSC_USE_DEBUG)
5736   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5737 #endif
5738   {
5739     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5740     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5741     PetscBool roworiented = aij->roworiented;
5742 
5743     /* Some Variables required in the macro */
5744     Mat        A                 = aij->A;
5745     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5746     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5747     MatScalar  *aa               = a->a;
5748     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5749     Mat        B                 = aij->B;
5750     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5751     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5752     MatScalar  *ba               = b->a;
5753 
5754     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5755     PetscInt  nonew = a->nonew;
5756     MatScalar *ap1,*ap2;
5757 
5758     PetscFunctionBegin;
5759     for (i=0; i<m; i++) {
5760       if (im[i] < 0) continue;
5761 #if defined(PETSC_USE_DEBUG)
5762       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5763 #endif
5764       if (im[i] >= rstart && im[i] < rend) {
5765         row      = im[i] - rstart;
5766         lastcol1 = -1;
5767         rp1      = aj + ai[row];
5768         ap1      = aa + ai[row];
5769         rmax1    = aimax[row];
5770         nrow1    = ailen[row];
5771         low1     = 0;
5772         high1    = nrow1;
5773         lastcol2 = -1;
5774         rp2      = bj + bi[row];
5775         ap2      = ba + bi[row];
5776         rmax2    = bimax[row];
5777         nrow2    = bilen[row];
5778         low2     = 0;
5779         high2    = nrow2;
5780 
5781         for (j=0; j<n; j++) {
5782           if (roworiented) value = v[i*n+j];
5783           else value = v[i+j*m];
5784           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5785           if (in[j] >= cstart && in[j] < cend) {
5786             col = in[j] - cstart;
5787             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5788           } else if (in[j] < 0) continue;
5789 #if defined(PETSC_USE_DEBUG)
5790           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5791 #endif
5792           else {
5793             if (mat->was_assembled) {
5794               if (!aij->colmap) {
5795                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5796               }
5797 #if defined(PETSC_USE_CTABLE)
5798               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5799               col--;
5800 #else
5801               col = aij->colmap[in[j]] - 1;
5802 #endif
5803               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5804                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5805                 col  =  in[j];
5806                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5807                 B     = aij->B;
5808                 b     = (Mat_SeqAIJ*)B->data;
5809                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5810                 rp2   = bj + bi[row];
5811                 ap2   = ba + bi[row];
5812                 rmax2 = bimax[row];
5813                 nrow2 = bilen[row];
5814                 low2  = 0;
5815                 high2 = nrow2;
5816                 bm    = aij->B->rmap->n;
5817                 ba    = b->a;
5818               }
5819             } else col = in[j];
5820             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5821           }
5822         }
5823       } else if (!aij->donotstash) {
5824         if (roworiented) {
5825           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5826         } else {
5827           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5828         }
5829       }
5830     }
5831   }
5832   PetscFunctionReturnVoid();
5833 }
5834 
5835