xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cf1aed2ce99d23e50336629af3ca8cf096900abb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscMPIInt    size   = mat->size;
748   PetscSF        sf;
749   PetscInt      *lrows;
750   PetscSFNode   *rrows;
751   PetscInt       lastidx = -1, r, p = 0, len = 0;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   /* Create SF where leaves are input rows and roots are owned rows */
756   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
757   for (r = 0; r < n; ++r) lrows[r] = -1;
758   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
759   for (r = 0; r < N; ++r) {
760     const PetscInt idx   = rows[r];
761     PetscBool      found = PETSC_FALSE;
762     /* Trick for efficient searching for sorted rows */
763     if (lastidx > idx) p = 0;
764     lastidx = idx;
765     for (; p < size; ++p) {
766       if (idx >= owners[p] && idx < owners[p+1]) {
767         rrows[r].rank  = p;
768         rrows[r].index = rows[r] - owners[p];
769         found = PETSC_TRUE;
770         break;
771       }
772     }
773     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
774   }
775   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
776   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
777   /* Collect flags for rows to be zeroed */
778   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
779   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
780   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
781   /* Compress and put in row numbers */
782   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
789     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
790     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
791     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
793   }
794   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
795   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
796   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936 
937   /* only change matrix nonzero state if pattern was allowed to be changed */
938   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
939     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
940     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPIAIJ"
947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       nt;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
956   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMultAdd_MPIAIJ"
977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   PetscErrorCode ierr;
996   PetscBool      merged;
997 
998   PetscFunctionBegin;
999   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1000   /* do nondiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   if (!merged) {
1003     /* send it on its way */
1004     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* receive remote parts: note this assumes the values are not actually */
1008     /* added in yy until the next line, */
1009     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   } else {
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* send it on its way */
1014     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1015     /* values actually were received in the Begin() but we need to call this nop */
1016     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1023 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1024 {
1025   MPI_Comm       comm;
1026   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1027   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1028   IS             Me,Notme;
1029   PetscErrorCode ierr;
1030   PetscInt       M,N,first,last,*notme,i;
1031   PetscMPIInt    size;
1032 
1033   PetscFunctionBegin;
1034   /* Easy test: symmetric diagonal block */
1035   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1036   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1037   if (!*f) PetscFunctionReturn(0);
1038   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1040   if (size == 1) PetscFunctionReturn(0);
1041 
1042   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1043   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1045   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1046   for (i=0; i<first; i++) notme[i] = i;
1047   for (i=last; i<M; i++) notme[i-last+first] = i;
1048   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1049   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1050   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1051   Aoff = Aoffs[0];
1052   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1053   Boff = Boffs[0];
1054   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1056   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1058   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1059   ierr = PetscFree(notme);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   /* send it on its way */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   /* do local part */
1076   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1077   /* receive remote parts */
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1089 {
1090   PetscErrorCode ierr;
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092 
1093   PetscFunctionBegin;
1094   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1095   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1096   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatScale_MPIAIJ"
1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1109   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatDestroy_Redundant"
1115 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1116 {
1117   PetscErrorCode ierr;
1118   Mat_Redundant  *redund = *redundant;
1119   PetscInt       i;
1120 
1121   PetscFunctionBegin;
1122   *redundant = NULL;
1123   if (redund){
1124     if (redund->matseq) { /* via MatGetSubMatrices()  */
1125       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1126       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1127       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1128       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1129     } else {
1130       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1131       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1132       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1133       for (i=0; i<redund->nrecvs; i++) {
1134         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1135         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1136       }
1137       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1138     }
1139 
1140     if (redund->psubcomm) {
1141       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1142     }
1143     ierr = PetscFree(redund);CHKERRQ(ierr);
1144   }
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatDestroy_MPIAIJ"
1150 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1151 {
1152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156 #if defined(PETSC_USE_LOG)
1157   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1158 #endif
1159   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1160   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1161   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1162   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1164 #if defined(PETSC_USE_CTABLE)
1165   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1166 #else
1167   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1168 #endif
1169   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1170   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1171   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1172   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1173   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1174   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1175 
1176   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1190 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1191 {
1192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1193   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1194   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1195   PetscErrorCode ierr;
1196   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1197   int            fd;
1198   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1199   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1200   PetscScalar    *column_values;
1201   PetscInt       message_count,flowcontrolcount;
1202   FILE           *file;
1203 
1204   PetscFunctionBegin;
1205   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1206   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1207   nz   = A->nz + B->nz;
1208   if (!rank) {
1209     header[0] = MAT_FILE_CLASSID;
1210     header[1] = mat->rmap->N;
1211     header[2] = mat->cmap->N;
1212 
1213     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1215     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     /* get largest number of rows any processor has */
1217     rlen  = mat->rmap->n;
1218     range = mat->rmap->range;
1219     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1220   } else {
1221     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222     rlen = mat->rmap->n;
1223   }
1224 
1225   /* load up the local row counts */
1226   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1227   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1228 
1229   /* store the row lengths to the file */
1230   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1231   if (!rank) {
1232     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     for (i=1; i<size; i++) {
1234       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1235       rlen = range[i+1] - range[i];
1236       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1238     }
1239     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1240   } else {
1241     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1242     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1246 
1247   /* load up the local column indices */
1248   nzmax = nz; /* th processor needs space a largest processor needs */
1249   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1251   cnt   = 0;
1252   for (i=0; i<mat->rmap->n; i++) {
1253     for (j=B->i[i]; j<B->i[i+1]; j++) {
1254       if ((col = garray[B->j[j]]) > cstart) break;
1255       column_indices[cnt++] = col;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1258     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1259   }
1260   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1261 
1262   /* store the column indices to the file */
1263   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1264   if (!rank) {
1265     MPI_Status status;
1266     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1267     for (i=1; i<size; i++) {
1268       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1269       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1270       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1271       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1273     }
1274     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1275   } else {
1276     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1277     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1280   }
1281   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1282 
1283   /* load up the local column values */
1284   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1285   cnt  = 0;
1286   for (i=0; i<mat->rmap->n; i++) {
1287     for (j=B->i[i]; j<B->i[i+1]; j++) {
1288       if (garray[B->j[j]] > cstart) break;
1289       column_values[cnt++] = B->a[j];
1290     }
1291     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1292     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1293   }
1294   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1295 
1296   /* store the column values to the file */
1297   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1298   if (!rank) {
1299     MPI_Status status;
1300     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1301     for (i=1; i<size; i++) {
1302       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1303       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1304       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1305       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1306       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1307     }
1308     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1309   } else {
1310     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1311     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1313     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1314   }
1315   ierr = PetscFree(column_values);CHKERRQ(ierr);
1316 
1317   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1318   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #include <petscdraw.h>
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1326 {
1327   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1328   PetscErrorCode    ierr;
1329   PetscMPIInt       rank = aij->rank,size = aij->size;
1330   PetscBool         isdraw,iascii,isbinary;
1331   PetscViewer       sviewer;
1332   PetscViewerFormat format;
1333 
1334   PetscFunctionBegin;
1335   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1336   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1337   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1338   if (iascii) {
1339     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1340     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1341       MatInfo   info;
1342       PetscBool inodes;
1343 
1344       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1345       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1346       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1348       if (!inodes) {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       } else {
1352         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1353                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1354       }
1355       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1357       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1359       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1362       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1363       PetscFunctionReturn(0);
1364     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1365       PetscInt inodecount,inodelimit,*inodes;
1366       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1367       if (inodes) {
1368         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1369       } else {
1370         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1371       }
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1374       PetscFunctionReturn(0);
1375     }
1376   } else if (isbinary) {
1377     if (size == 1) {
1378       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1379       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1380     } else {
1381       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1382     }
1383     PetscFunctionReturn(0);
1384   } else if (isdraw) {
1385     PetscDraw draw;
1386     PetscBool isnull;
1387     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1388     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1389   }
1390 
1391   {
1392     /* assemble the entire matrix onto first processor. */
1393     Mat        A;
1394     Mat_SeqAIJ *Aloc;
1395     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1396     MatScalar  *a;
1397 
1398     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1399     if (!rank) {
1400       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1401     } else {
1402       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1403     }
1404     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1405     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1406     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1407     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1408     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1409 
1410     /* copy over the A part */
1411     Aloc = (Mat_SeqAIJ*)aij->A->data;
1412     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1413     row  = mat->rmap->rstart;
1414     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1415     for (i=0; i<m; i++) {
1416       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1417       row++;
1418       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1419     }
1420     aj = Aloc->j;
1421     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1422 
1423     /* copy over the B part */
1424     Aloc = (Mat_SeqAIJ*)aij->B->data;
1425     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1426     row  = mat->rmap->rstart;
1427     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1428     ct   = cols;
1429     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1430     for (i=0; i<m; i++) {
1431       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1432       row++;
1433       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1434     }
1435     ierr = PetscFree(ct);CHKERRQ(ierr);
1436     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1438     /*
1439        Everyone has to call to draw the matrix since the graphics waits are
1440        synchronized across all processors that share the PetscDraw object
1441     */
1442     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1443     if (!rank) {
1444       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1445     }
1446     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1447     ierr = MatDestroy(&A);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatView_MPIAIJ"
1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1455 {
1456   PetscErrorCode ierr;
1457   PetscBool      iascii,isdraw,issocket,isbinary;
1458 
1459   PetscFunctionBegin;
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1463   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1464   if (iascii || isdraw || isbinary || issocket) {
1465     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSOR_MPIAIJ"
1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1473 {
1474   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1475   PetscErrorCode ierr;
1476   Vec            bb1 = 0;
1477   PetscBool      hasop;
1478 
1479   PetscFunctionBegin;
1480   if (flag == SOR_APPLY_UPPER) {
1481     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482     PetscFunctionReturn(0);
1483   }
1484 
1485   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1486     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1487   }
1488 
1489   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1490     if (flag & SOR_ZERO_INITIAL_GUESS) {
1491       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1492       its--;
1493     }
1494 
1495     while (its--) {
1496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498 
1499       /* update rhs: bb1 = bb - B*x */
1500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1502 
1503       /* local sweep */
1504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1505     }
1506   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511     while (its--) {
1512       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514 
1515       /* update rhs: bb1 = bb - B*x */
1516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1518 
1519       /* local sweep */
1520       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1521     }
1522   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1523     if (flag & SOR_ZERO_INITIAL_GUESS) {
1524       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1525       its--;
1526     }
1527     while (its--) {
1528       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530 
1531       /* update rhs: bb1 = bb - B*x */
1532       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1533       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1534 
1535       /* local sweep */
1536       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1537     }
1538   } else if (flag & SOR_EISENSTAT) {
1539     Vec xx1;
1540 
1541     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1543 
1544     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546     if (!mat->diag) {
1547       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1548       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1549     }
1550     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1551     if (hasop) {
1552       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1553     } else {
1554       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1555     }
1556     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1557 
1558     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1559 
1560     /* local sweep */
1561     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1562     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1563     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1564   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1565 
1566   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatPermute_MPIAIJ"
1572 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1573 {
1574   Mat            aA,aB,Aperm;
1575   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1576   PetscScalar    *aa,*ba;
1577   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1578   PetscSF        rowsf,sf;
1579   IS             parcolp = NULL;
1580   PetscBool      done;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1585   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1586   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1587   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1588 
1589   /* Invert row permutation to find out where my rows should go */
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1593   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1594   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1596 
1597   /* Invert column permutation to find out where my columns should go */
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1601   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1602   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1605 
1606   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1607   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1608   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1609 
1610   /* Find out where my gcols should go */
1611   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1612   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1613   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1614   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1615   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1616   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1617   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1619 
1620   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1621   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1622   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1623   for (i=0; i<m; i++) {
1624     PetscInt row = rdest[i],rowner;
1625     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1626     for (j=ai[i]; j<ai[i+1]; j++) {
1627       PetscInt cowner,col = cdest[aj[j]];
1628       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1629       if (rowner == cowner) dnnz[i]++;
1630       else onnz[i]++;
1631     }
1632     for (j=bi[i]; j<bi[i+1]; j++) {
1633       PetscInt cowner,col = gcdest[bj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638   }
1639   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1644 
1645   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1646   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1647   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) {
1649     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1650     PetscInt j0,rowlen;
1651     rowlen = ai[i+1] - ai[i];
1652     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1653       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1654       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1655     }
1656     rowlen = bi[i+1] - bi[i];
1657     for (j0=j=0; j<rowlen; j0=j) {
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661   }
1662   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1665   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1666   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1667   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1668   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1669   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1670   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1671   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1672   *B = Aperm;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1678 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1679 {
1680   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1681   Mat            A    = mat->A,B = mat->B;
1682   PetscErrorCode ierr;
1683   PetscReal      isend[5],irecv[5];
1684 
1685   PetscFunctionBegin;
1686   info->block_size = 1.0;
1687   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1688 
1689   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1690   isend[3] = info->memory;  isend[4] = info->mallocs;
1691 
1692   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1693 
1694   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1695   isend[3] += info->memory;  isend[4] += info->mallocs;
1696   if (flag == MAT_LOCAL) {
1697     info->nz_used      = isend[0];
1698     info->nz_allocated = isend[1];
1699     info->nz_unneeded  = isend[2];
1700     info->memory       = isend[3];
1701     info->mallocs      = isend[4];
1702   } else if (flag == MAT_GLOBAL_MAX) {
1703     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1704 
1705     info->nz_used      = irecv[0];
1706     info->nz_allocated = irecv[1];
1707     info->nz_unneeded  = irecv[2];
1708     info->memory       = irecv[3];
1709     info->mallocs      = irecv[4];
1710   } else if (flag == MAT_GLOBAL_SUM) {
1711     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1712 
1713     info->nz_used      = irecv[0];
1714     info->nz_allocated = irecv[1];
1715     info->nz_unneeded  = irecv[2];
1716     info->memory       = irecv[3];
1717     info->mallocs      = irecv[4];
1718   }
1719   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1720   info->fill_ratio_needed = 0;
1721   info->factor_mallocs    = 0;
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatSetOption_MPIAIJ"
1727 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1728 {
1729   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   switch (op) {
1734   case MAT_NEW_NONZERO_LOCATIONS:
1735   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1736   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1737   case MAT_KEEP_NONZERO_PATTERN:
1738   case MAT_NEW_NONZERO_LOCATION_ERR:
1739   case MAT_USE_INODES:
1740   case MAT_IGNORE_ZERO_ENTRIES:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_ROW_ORIENTED:
1746     a->roworiented = flg;
1747 
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_NEW_DIAGONALS:
1752     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1753     break;
1754   case MAT_IGNORE_OFF_PROC_ENTRIES:
1755     a->donotstash = flg;
1756     break;
1757   case MAT_SPD:
1758     A->spd_set = PETSC_TRUE;
1759     A->spd     = flg;
1760     if (flg) {
1761       A->symmetric                  = PETSC_TRUE;
1762       A->structurally_symmetric     = PETSC_TRUE;
1763       A->symmetric_set              = PETSC_TRUE;
1764       A->structurally_symmetric_set = PETSC_TRUE;
1765     }
1766     break;
1767   case MAT_SYMMETRIC:
1768     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_STRUCTURALLY_SYMMETRIC:
1771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_HERMITIAN:
1774     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1775     break;
1776   case MAT_SYMMETRY_ETERNAL:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   default:
1780     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatGetRow_MPIAIJ"
1787 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1788 {
1789   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1790   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1791   PetscErrorCode ierr;
1792   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1793   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1794   PetscInt       *cmap,*idx_p;
1795 
1796   PetscFunctionBegin;
1797   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1798   mat->getrowactive = PETSC_TRUE;
1799 
1800   if (!mat->rowvalues && (idx || v)) {
1801     /*
1802         allocate enough space to hold information from the longest row.
1803     */
1804     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1805     PetscInt   max = 1,tmp;
1806     for (i=0; i<matin->rmap->n; i++) {
1807       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1808       if (max < tmp) max = tmp;
1809     }
1810     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1811   }
1812 
1813   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1814   lrow = row - rstart;
1815 
1816   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1817   if (!v)   {pvA = 0; pvB = 0;}
1818   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1819   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1820   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1821   nztot = nzA + nzB;
1822 
1823   cmap = mat->garray;
1824   if (v  || idx) {
1825     if (nztot) {
1826       /* Sort by increasing column numbers, assuming A and B already sorted */
1827       PetscInt imark = -1;
1828       if (v) {
1829         *v = v_p = mat->rowvalues;
1830         for (i=0; i<nzB; i++) {
1831           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1832           else break;
1833         }
1834         imark = i;
1835         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1836         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1837       }
1838       if (idx) {
1839         *idx = idx_p = mat->rowindices;
1840         if (imark > -1) {
1841           for (i=0; i<imark; i++) {
1842             idx_p[i] = cmap[cworkB[i]];
1843           }
1844         } else {
1845           for (i=0; i<nzB; i++) {
1846             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1847             else break;
1848           }
1849           imark = i;
1850         }
1851         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1852         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1853       }
1854     } else {
1855       if (idx) *idx = 0;
1856       if (v)   *v   = 0;
1857     }
1858   }
1859   *nz  = nztot;
1860   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1861   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1867 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1868 {
1869   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1870 
1871   PetscFunctionBegin;
1872   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1873   aij->getrowactive = PETSC_FALSE;
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatNorm_MPIAIJ"
1879 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1880 {
1881   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1882   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1883   PetscErrorCode ierr;
1884   PetscInt       i,j,cstart = mat->cmap->rstart;
1885   PetscReal      sum = 0.0;
1886   MatScalar      *v;
1887 
1888   PetscFunctionBegin;
1889   if (aij->size == 1) {
1890     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1891   } else {
1892     if (type == NORM_FROBENIUS) {
1893       v = amat->a;
1894       for (i=0; i<amat->nz; i++) {
1895         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1896       }
1897       v = bmat->a;
1898       for (i=0; i<bmat->nz; i++) {
1899         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1900       }
1901       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902       *norm = PetscSqrtReal(*norm);
1903     } else if (type == NORM_1) { /* max column norm */
1904       PetscReal *tmp,*tmp2;
1905       PetscInt  *jj,*garray = aij->garray;
1906       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1907       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1908       *norm = 0.0;
1909       v     = amat->a; jj = amat->j;
1910       for (j=0; j<amat->nz; j++) {
1911         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1912       }
1913       v = bmat->a; jj = bmat->j;
1914       for (j=0; j<bmat->nz; j++) {
1915         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1916       }
1917       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1918       for (j=0; j<mat->cmap->N; j++) {
1919         if (tmp2[j] > *norm) *norm = tmp2[j];
1920       }
1921       ierr = PetscFree(tmp);CHKERRQ(ierr);
1922       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1923     } else if (type == NORM_INFINITY) { /* max row norm */
1924       PetscReal ntemp = 0.0;
1925       for (j=0; j<aij->A->rmap->n; j++) {
1926         v   = amat->a + amat->i[j];
1927         sum = 0.0;
1928         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1929           sum += PetscAbsScalar(*v); v++;
1930         }
1931         v = bmat->a + bmat->i[j];
1932         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1933           sum += PetscAbsScalar(*v); v++;
1934         }
1935         if (sum > ntemp) ntemp = sum;
1936       }
1937       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1938     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1939   }
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatTranspose_MPIAIJ"
1945 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1946 {
1947   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1948   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1951   PetscInt       cstart = A->cmap->rstart,ncol;
1952   Mat            B;
1953   MatScalar      *array;
1954 
1955   PetscFunctionBegin;
1956   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1957 
1958   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1959   ai = Aloc->i; aj = Aloc->j;
1960   bi = Bloc->i; bj = Bloc->j;
1961   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1962     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1963     PetscSFNode          *oloc;
1964     PETSC_UNUSED PetscSF sf;
1965 
1966     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1967     /* compute d_nnz for preallocation */
1968     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1969     for (i=0; i<ai[ma]; i++) {
1970       d_nnz[aj[i]]++;
1971       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1972     }
1973     /* compute local off-diagonal contributions */
1974     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1975     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1976     /* map those to global */
1977     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1978     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1979     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1980     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1981     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1982     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1983     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1984 
1985     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1986     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1987     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1988     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1989     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1990     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1991   } else {
1992     B    = *matout;
1993     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1994     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1995   }
1996 
1997   /* copy over the A part */
1998   array = Aloc->a;
1999   row   = A->rmap->rstart;
2000   for (i=0; i<ma; i++) {
2001     ncol = ai[i+1]-ai[i];
2002     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2003     row++;
2004     array += ncol; aj += ncol;
2005   }
2006   aj = Aloc->j;
2007   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2008 
2009   /* copy over the B part */
2010   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2011   array = Bloc->a;
2012   row   = A->rmap->rstart;
2013   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2014   cols_tmp = cols;
2015   for (i=0; i<mb; i++) {
2016     ncol = bi[i+1]-bi[i];
2017     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2018     row++;
2019     array += ncol; cols_tmp += ncol;
2020   }
2021   ierr = PetscFree(cols);CHKERRQ(ierr);
2022 
2023   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2026     *matout = B;
2027   } else {
2028     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2035 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2036 {
2037   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2038   Mat            a    = aij->A,b = aij->B;
2039   PetscErrorCode ierr;
2040   PetscInt       s1,s2,s3;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2044   if (rr) {
2045     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2046     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2047     /* Overlap communication with computation. */
2048     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2049   }
2050   if (ll) {
2051     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2052     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2053     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2054   }
2055   /* scale  the diagonal block */
2056   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2057 
2058   if (rr) {
2059     /* Do a scatter end and then right scale the off-diagonal block */
2060     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2061     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2068 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2069 {
2070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2071   PetscErrorCode ierr;
2072 
2073   PetscFunctionBegin;
2074   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatEqual_MPIAIJ"
2080 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2081 {
2082   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2083   Mat            a,b,c,d;
2084   PetscBool      flg;
2085   PetscErrorCode ierr;
2086 
2087   PetscFunctionBegin;
2088   a = matA->A; b = matA->B;
2089   c = matB->A; d = matB->B;
2090 
2091   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2092   if (flg) {
2093     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2094   }
2095   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatCopy_MPIAIJ"
2101 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2102 {
2103   PetscErrorCode ierr;
2104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2105   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2106 
2107   PetscFunctionBegin;
2108   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2109   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2110     /* because of the column compression in the off-processor part of the matrix a->B,
2111        the number of columns in a->B and b->B may be different, hence we cannot call
2112        the MatCopy() directly on the two parts. If need be, we can provide a more
2113        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2114        then copying the submatrices */
2115     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2116   } else {
2117     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2118     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatSetUp_MPIAIJ"
2125 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2126 {
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2136 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2138 {
2139   PetscInt       i,m=Y->rmap->N;
2140   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2141   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2142   const PetscInt *xi = x->i,*yi = y->i;
2143 
2144   PetscFunctionBegin;
2145   /* Set the number of nonzeros in the new matrix */
2146   for (i=0; i<m; i++) {
2147     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2148     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2149     nnz[i] = 0;
2150     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2151       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2152       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2153       nnz[i]++;
2154     }
2155     for (; k<nzy; k++) nnz[i]++;
2156   }
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatAXPY_MPIAIJ"
2162 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2163 {
2164   PetscErrorCode ierr;
2165   PetscInt       i;
2166   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2167   PetscBLASInt   bnz,one=1;
2168   Mat_SeqAIJ     *x,*y;
2169 
2170   PetscFunctionBegin;
2171   if (str == SAME_NONZERO_PATTERN) {
2172     PetscScalar alpha = a;
2173     x    = (Mat_SeqAIJ*)xx->A->data;
2174     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2175     y    = (Mat_SeqAIJ*)yy->A->data;
2176     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2177     x    = (Mat_SeqAIJ*)xx->B->data;
2178     y    = (Mat_SeqAIJ*)yy->B->data;
2179     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2180     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2181     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2182   } else if (str == SUBSET_NONZERO_PATTERN) {
2183     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2184 
2185     x = (Mat_SeqAIJ*)xx->B->data;
2186     y = (Mat_SeqAIJ*)yy->B->data;
2187     if (y->xtoy && y->XtoY != xx->B) {
2188       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2189       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2190     }
2191     if (!y->xtoy) { /* get xtoy */
2192       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2193       y->XtoY = xx->B;
2194       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2195     }
2196     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2197     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2198   } else {
2199     Mat      B;
2200     PetscInt *nnz_d,*nnz_o;
2201     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2202     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2203     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2204     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2205     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2206     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2207     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2209     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2210     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2211     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2212     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2214     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatConjugate_MPIAIJ"
2223 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2224 {
2225 #if defined(PETSC_USE_COMPLEX)
2226   PetscErrorCode ierr;
2227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2231   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2232 #else
2233   PetscFunctionBegin;
2234 #endif
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatRealPart_MPIAIJ"
2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2241 {
2242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2247   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2254 {
2255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2256   PetscErrorCode ierr;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2260   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #if defined(PETSC_HAVE_PBGL)
2265 
2266 #include <boost/parallel/mpi/bsp_process_group.hpp>
2267 #include <boost/graph/distributed/ilu_default_graph.hpp>
2268 #include <boost/graph/distributed/ilu_0_block.hpp>
2269 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2270 #include <boost/graph/distributed/petsc/interface.hpp>
2271 #include <boost/multi_array.hpp>
2272 #include <boost/parallel/distributed_property_map->hpp>
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2276 /*
2277   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2278 */
2279 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2280 {
2281   namespace petsc = boost::distributed::petsc;
2282 
2283   namespace graph_dist = boost::graph::distributed;
2284   using boost::graph::distributed::ilu_default::process_group_type;
2285   using boost::graph::ilu_permuted;
2286 
2287   PetscBool      row_identity, col_identity;
2288   PetscContainer c;
2289   PetscInt       m, n, M, N;
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2294   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2295   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2296   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2297 
2298   process_group_type pg;
2299   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2300   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2301   lgraph_type& level_graph = *lgraph_p;
2302   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2303 
2304   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2305   ilu_permuted(level_graph);
2306 
2307   /* put together the new matrix */
2308   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2309   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2310   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2311   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2312   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2313   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2314   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2315   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316 
2317   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2318   ierr = PetscContainerSetPointer(c, lgraph_p);
2319   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2320   ierr = PetscContainerDestroy(&c);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2326 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2327 {
2328   PetscFunctionBegin;
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatSolve_MPIAIJ"
2334 /*
2335   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2336 */
2337 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2338 {
2339   namespace graph_dist = boost::graph::distributed;
2340 
2341   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2342   lgraph_type    *lgraph_p;
2343   PetscContainer c;
2344   PetscErrorCode ierr;
2345 
2346   PetscFunctionBegin;
2347   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2348   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2349   ierr = VecCopy(b, x);CHKERRQ(ierr);
2350 
2351   PetscScalar *array_x;
2352   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2353   PetscInt sx;
2354   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2355 
2356   PetscScalar *array_b;
2357   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2358   PetscInt sb;
2359   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2360 
2361   lgraph_type& level_graph = *lgraph_p;
2362   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2363 
2364   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2365   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2366   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2367 
2368   typedef boost::iterator_property_map<array_ref_type::iterator,
2369                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2370   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2371   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2372 
2373   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2374   PetscFunctionReturn(0);
2375 }
2376 #endif
2377 
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2381 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2382 {
2383   PetscMPIInt    rank,size;
2384   MPI_Comm       comm;
2385   PetscErrorCode ierr;
2386   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2387   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2388   PetscInt       *rowrange = mat->rmap->range;
2389   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2390   Mat            A = aij->A,B=aij->B,C=*matredundant;
2391   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2392   PetscScalar    *sbuf_a;
2393   PetscInt       nzlocal=a->nz+b->nz;
2394   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2395   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2396   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2397   MatScalar      *aworkA,*aworkB;
2398   PetscScalar    *vals;
2399   PetscMPIInt    tag1,tag2,tag3,imdex;
2400   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2401   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2402   MPI_Status     recv_status,*send_status;
2403   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2404   PetscInt       **rbuf_j=NULL;
2405   PetscScalar    **rbuf_a=NULL;
2406   Mat_Redundant  *redund =NULL;
2407 
2408   PetscFunctionBegin;
2409   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2410   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2412   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2413   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2414 
2415   if (reuse == MAT_REUSE_MATRIX) {
2416     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2417     if (subsize == 1) {
2418       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2419       redund = c->redundant;
2420     } else {
2421       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2422       redund = c->redundant;
2423     }
2424     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2425 
2426     nsends    = redund->nsends;
2427     nrecvs    = redund->nrecvs;
2428     send_rank = redund->send_rank;
2429     recv_rank = redund->recv_rank;
2430     sbuf_nz   = redund->sbuf_nz;
2431     rbuf_nz   = redund->rbuf_nz;
2432     sbuf_j    = redund->sbuf_j;
2433     sbuf_a    = redund->sbuf_a;
2434     rbuf_j    = redund->rbuf_j;
2435     rbuf_a    = redund->rbuf_a;
2436   }
2437 
2438   if (reuse == MAT_INITIAL_MATRIX) {
2439     PetscInt    nleftover,np_subcomm;
2440 
2441     /* get the destination processors' id send_rank, nsends and nrecvs */
2442     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2443 
2444     np_subcomm = size/nsubcomm;
2445     nleftover  = size - nsubcomm*np_subcomm;
2446 
2447     /* block of codes below is specific for INTERLACED */
2448     /* ------------------------------------------------*/
2449     nsends = 0; nrecvs = 0;
2450     for (i=0; i<size; i++) {
2451       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2452         send_rank[nsends++] = i;
2453         recv_rank[nrecvs++] = i;
2454       }
2455     }
2456     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2457       i = size-nleftover-1;
2458       j = 0;
2459       while (j < nsubcomm - nleftover) {
2460         send_rank[nsends++] = i;
2461         i--; j++;
2462       }
2463     }
2464 
2465     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2466       for (i=0; i<nleftover; i++) {
2467         recv_rank[nrecvs++] = size-nleftover+i;
2468       }
2469     }
2470     /*----------------------------------------------*/
2471 
2472     /* allocate sbuf_j, sbuf_a */
2473     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2474     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2475     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2476     /*
2477     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2478     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2479      */
2480   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2481 
2482   /* copy mat's local entries into the buffers */
2483   if (reuse == MAT_INITIAL_MATRIX) {
2484     rownz_max = 0;
2485     rptr      = sbuf_j;
2486     cols      = sbuf_j + rend-rstart + 1;
2487     vals      = sbuf_a;
2488     rptr[0]   = 0;
2489     for (i=0; i<rend-rstart; i++) {
2490       row    = i + rstart;
2491       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2492       ncols  = nzA + nzB;
2493       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2494       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2495       /* load the column indices for this row into cols */
2496       lwrite = 0;
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       for (l=0; l<nzA; l++) {
2504         vals[lwrite]   = aworkA[l];
2505         cols[lwrite++] = cstart + cworkA[l];
2506       }
2507       for (l=0; l<nzB; l++) {
2508         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2509           vals[lwrite]   = aworkB[l];
2510           cols[lwrite++] = ctmp;
2511         }
2512       }
2513       vals     += ncols;
2514       cols     += ncols;
2515       rptr[i+1] = rptr[i] + ncols;
2516       if (rownz_max < ncols) rownz_max = ncols;
2517     }
2518     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2519   } else { /* only copy matrix values into sbuf_a */
2520     rptr    = sbuf_j;
2521     vals    = sbuf_a;
2522     rptr[0] = 0;
2523     for (i=0; i<rend-rstart; i++) {
2524       row    = i + rstart;
2525       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2526       ncols  = nzA + nzB;
2527       cworkB = b->j + b->i[i];
2528       aworkA = a->a + a->i[i];
2529       aworkB = b->a + b->i[i];
2530       lwrite = 0;
2531       for (l=0; l<nzB; l++) {
2532         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2533       }
2534       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2537       }
2538       vals     += ncols;
2539       rptr[i+1] = rptr[i] + ncols;
2540     }
2541   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2542 
2543   /* send nzlocal to others, and recv other's nzlocal */
2544   /*--------------------------------------------------*/
2545   if (reuse == MAT_INITIAL_MATRIX) {
2546     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2547 
2548     s_waits2 = s_waits3 + nsends;
2549     s_waits1 = s_waits2 + nsends;
2550     r_waits1 = s_waits1 + nsends;
2551     r_waits2 = r_waits1 + nrecvs;
2552     r_waits3 = r_waits2 + nrecvs;
2553   } else {
2554     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2555 
2556     r_waits3 = s_waits3 + nsends;
2557   }
2558 
2559   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2560   if (reuse == MAT_INITIAL_MATRIX) {
2561     /* get new tags to keep the communication clean */
2562     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2563     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2564     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2565 
2566     /* post receives of other's nzlocal */
2567     for (i=0; i<nrecvs; i++) {
2568       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2569     }
2570     /* send nzlocal to others */
2571     for (i=0; i<nsends; i++) {
2572       sbuf_nz[i] = nzlocal;
2573       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2576     count = nrecvs;
2577     while (count) {
2578       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2579 
2580       recv_rank[imdex] = recv_status.MPI_SOURCE;
2581       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2582       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2583 
2584       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2585 
2586       rbuf_nz[imdex] += i + 2;
2587 
2588       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2589       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2590       count--;
2591     }
2592     /* wait on sends of nzlocal */
2593     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2594     /* send mat->i,j to others, and recv from other's */
2595     /*------------------------------------------------*/
2596     for (i=0; i<nsends; i++) {
2597       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2598       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2599     }
2600     /* wait on receives of mat->i,j */
2601     /*------------------------------*/
2602     count = nrecvs;
2603     while (count) {
2604       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2605       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2606       count--;
2607     }
2608     /* wait on sends of mat->i,j */
2609     /*---------------------------*/
2610     if (nsends) {
2611       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2612     }
2613   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2614 
2615   /* post receives, send and receive mat->a */
2616   /*----------------------------------------*/
2617   for (imdex=0; imdex<nrecvs; imdex++) {
2618     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2619   }
2620   for (i=0; i<nsends; i++) {
2621     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2622   }
2623   count = nrecvs;
2624   while (count) {
2625     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2626     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2627     count--;
2628   }
2629   if (nsends) {
2630     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2631   }
2632 
2633   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2634 
2635   /* create redundant matrix */
2636   /*-------------------------*/
2637   if (reuse == MAT_INITIAL_MATRIX) {
2638     const PetscInt *range;
2639     PetscInt       rstart_sub,rend_sub,mloc_sub;
2640 
2641     /* compute rownz_max for preallocation */
2642     for (imdex=0; imdex<nrecvs; imdex++) {
2643       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2644       rptr = rbuf_j[imdex];
2645       for (i=0; i<j; i++) {
2646         ncols = rptr[i+1] - rptr[i];
2647         if (rownz_max < ncols) rownz_max = ncols;
2648       }
2649     }
2650 
2651     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2652 
2653     /* get local size of redundant matrix
2654        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2655     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2656     rstart_sub = range[nsubcomm*subrank];
2657     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2658       rend_sub = range[nsubcomm*(subrank+1)];
2659     } else {
2660       rend_sub = mat->rmap->N;
2661     }
2662     mloc_sub = rend_sub - rstart_sub;
2663 
2664     if (M == N) {
2665       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2666     } else { /* non-square matrix */
2667       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2668     }
2669     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2670     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2671     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2672     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2673   } else {
2674     C = *matredundant;
2675   }
2676 
2677   /* insert local matrix entries */
2678   rptr = sbuf_j;
2679   cols = sbuf_j + rend-rstart + 1;
2680   vals = sbuf_a;
2681   for (i=0; i<rend-rstart; i++) {
2682     row   = i + rstart;
2683     ncols = rptr[i+1] - rptr[i];
2684     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2685     vals += ncols;
2686     cols += ncols;
2687   }
2688   /* insert received matrix entries */
2689   for (imdex=0; imdex<nrecvs; imdex++) {
2690     rstart = rowrange[recv_rank[imdex]];
2691     rend   = rowrange[recv_rank[imdex]+1];
2692     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2693     rptr   = rbuf_j[imdex];
2694     cols   = rbuf_j[imdex] + rend-rstart + 1;
2695     vals   = rbuf_a[imdex];
2696     for (i=0; i<rend-rstart; i++) {
2697       row   = i + rstart;
2698       ncols = rptr[i+1] - rptr[i];
2699       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700       vals += ncols;
2701       cols += ncols;
2702     }
2703   }
2704   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706 
2707   if (reuse == MAT_INITIAL_MATRIX) {
2708     *matredundant = C;
2709 
2710     /* create a supporting struct and attach it to C for reuse */
2711     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2712     if (subsize == 1) {
2713       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2714       c->redundant = redund;
2715     } else {
2716       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2717       c->redundant = redund;
2718     }
2719 
2720     redund->nzlocal   = nzlocal;
2721     redund->nsends    = nsends;
2722     redund->nrecvs    = nrecvs;
2723     redund->send_rank = send_rank;
2724     redund->recv_rank = recv_rank;
2725     redund->sbuf_nz   = sbuf_nz;
2726     redund->rbuf_nz   = rbuf_nz;
2727     redund->sbuf_j    = sbuf_j;
2728     redund->sbuf_a    = sbuf_a;
2729     redund->rbuf_j    = rbuf_j;
2730     redund->rbuf_a    = rbuf_a;
2731     redund->psubcomm  = NULL;
2732   }
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2738 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2739 {
2740   PetscErrorCode ierr;
2741   MPI_Comm       comm;
2742   PetscMPIInt    size,subsize;
2743   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2744   Mat_Redundant  *redund=NULL;
2745   PetscSubcomm   psubcomm=NULL;
2746   MPI_Comm       subcomm_in=subcomm;
2747   Mat            *matseq;
2748   IS             isrow,iscol;
2749 
2750   PetscFunctionBegin;
2751   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2752     if (reuse ==  MAT_INITIAL_MATRIX) {
2753       /* create psubcomm, then get subcomm */
2754       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2755       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2756       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2757 
2758       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2759       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2760       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2761       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2762       subcomm = psubcomm->comm;
2763     } else { /* retrieve psubcomm and subcomm */
2764       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2765       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2766       if (subsize == 1) {
2767         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2768         redund = c->redundant;
2769       } else {
2770         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2771         redund = c->redundant;
2772       }
2773       psubcomm = redund->psubcomm;
2774     }
2775     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2776       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2777       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2778         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2779         if (subsize == 1) {
2780           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2781           c->redundant->psubcomm = psubcomm;
2782         } else {
2783           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2784           c->redundant->psubcomm = psubcomm ;
2785         }
2786       }
2787       PetscFunctionReturn(0);
2788     }
2789   }
2790 
2791   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2792   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2793   if (reuse == MAT_INITIAL_MATRIX) {
2794     /* create a local sequential matrix matseq[0] */
2795     mloc_sub = PETSC_DECIDE;
2796     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2797     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2798     rstart = rend - mloc_sub;
2799     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2800     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2801   } else { /* reuse == MAT_REUSE_MATRIX */
2802     if (subsize == 1) {
2803       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2804       redund = c->redundant;
2805     } else {
2806       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2807       redund = c->redundant;
2808     }
2809 
2810     isrow  = redund->isrow;
2811     iscol  = redund->iscol;
2812     matseq = redund->matseq;
2813   }
2814   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2815   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2816 
2817   if (reuse == MAT_INITIAL_MATRIX) {
2818     /* create a supporting struct and attach it to C for reuse */
2819     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2820     if (subsize == 1) {
2821       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2822       c->redundant = redund;
2823     } else {
2824       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2825       c->redundant = redund;
2826     }
2827     redund->isrow    = isrow;
2828     redund->iscol    = iscol;
2829     redund->matseq   = matseq;
2830     redund->psubcomm = psubcomm;
2831   }
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2837 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2838 {
2839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2840   PetscErrorCode ierr;
2841   PetscInt       i,*idxb = 0;
2842   PetscScalar    *va,*vb;
2843   Vec            vtmp;
2844 
2845   PetscFunctionBegin;
2846   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2847   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2848   if (idx) {
2849     for (i=0; i<A->rmap->n; i++) {
2850       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2851     }
2852   }
2853 
2854   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2855   if (idx) {
2856     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2857   }
2858   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2859   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2860 
2861   for (i=0; i<A->rmap->n; i++) {
2862     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2863       va[i] = vb[i];
2864       if (idx) idx[i] = a->garray[idxb[i]];
2865     }
2866   }
2867 
2868   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2869   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2870   ierr = PetscFree(idxb);CHKERRQ(ierr);
2871   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 #undef __FUNCT__
2876 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2877 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2878 {
2879   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2880   PetscErrorCode ierr;
2881   PetscInt       i,*idxb = 0;
2882   PetscScalar    *va,*vb;
2883   Vec            vtmp;
2884 
2885   PetscFunctionBegin;
2886   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2887   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2888   if (idx) {
2889     for (i=0; i<A->cmap->n; i++) {
2890       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2891     }
2892   }
2893 
2894   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2895   if (idx) {
2896     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2897   }
2898   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2899   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2900 
2901   for (i=0; i<A->rmap->n; i++) {
2902     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2903       va[i] = vb[i];
2904       if (idx) idx[i] = a->garray[idxb[i]];
2905     }
2906   }
2907 
2908   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2909   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2910   ierr = PetscFree(idxb);CHKERRQ(ierr);
2911   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 #undef __FUNCT__
2916 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2917 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2918 {
2919   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2920   PetscInt       n      = A->rmap->n;
2921   PetscInt       cstart = A->cmap->rstart;
2922   PetscInt       *cmap  = mat->garray;
2923   PetscInt       *diagIdx, *offdiagIdx;
2924   Vec            diagV, offdiagV;
2925   PetscScalar    *a, *diagA, *offdiagA;
2926   PetscInt       r;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2932   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2933   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2934   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2935   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   for (r = 0; r < n; ++r) {
2939     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2940       a[r]   = diagA[r];
2941       idx[r] = cstart + diagIdx[r];
2942     } else {
2943       a[r]   = offdiagA[r];
2944       idx[r] = cmap[offdiagIdx[r]];
2945     }
2946   }
2947   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2951   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2952   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2958 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2959 {
2960   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2961   PetscInt       n      = A->rmap->n;
2962   PetscInt       cstart = A->cmap->rstart;
2963   PetscInt       *cmap  = mat->garray;
2964   PetscInt       *diagIdx, *offdiagIdx;
2965   Vec            diagV, offdiagV;
2966   PetscScalar    *a, *diagA, *offdiagA;
2967   PetscInt       r;
2968   PetscErrorCode ierr;
2969 
2970   PetscFunctionBegin;
2971   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2972   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2973   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2974   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2975   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2976   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2977   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2978   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2979   for (r = 0; r < n; ++r) {
2980     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2981       a[r]   = diagA[r];
2982       idx[r] = cstart + diagIdx[r];
2983     } else {
2984       a[r]   = offdiagA[r];
2985       idx[r] = cmap[offdiagIdx[r]];
2986     }
2987   }
2988   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2989   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2990   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2991   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2992   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2993   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2999 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3000 {
3001   PetscErrorCode ierr;
3002   Mat            *dummy;
3003 
3004   PetscFunctionBegin;
3005   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3006   *newmat = *dummy;
3007   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3013 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3014 {
3015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3016   PetscErrorCode ierr;
3017 
3018   PetscFunctionBegin;
3019   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 #undef __FUNCT__
3024 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3025 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3026 {
3027   PetscErrorCode ierr;
3028   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3029 
3030   PetscFunctionBegin;
3031   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3032   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3033   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3034   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 /* -------------------------------------------------------------------*/
3039 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3040                                        MatGetRow_MPIAIJ,
3041                                        MatRestoreRow_MPIAIJ,
3042                                        MatMult_MPIAIJ,
3043                                 /* 4*/ MatMultAdd_MPIAIJ,
3044                                        MatMultTranspose_MPIAIJ,
3045                                        MatMultTransposeAdd_MPIAIJ,
3046 #if defined(PETSC_HAVE_PBGL)
3047                                        MatSolve_MPIAIJ,
3048 #else
3049                                        0,
3050 #endif
3051                                        0,
3052                                        0,
3053                                 /*10*/ 0,
3054                                        0,
3055                                        0,
3056                                        MatSOR_MPIAIJ,
3057                                        MatTranspose_MPIAIJ,
3058                                 /*15*/ MatGetInfo_MPIAIJ,
3059                                        MatEqual_MPIAIJ,
3060                                        MatGetDiagonal_MPIAIJ,
3061                                        MatDiagonalScale_MPIAIJ,
3062                                        MatNorm_MPIAIJ,
3063                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3064                                        MatAssemblyEnd_MPIAIJ,
3065                                        MatSetOption_MPIAIJ,
3066                                        MatZeroEntries_MPIAIJ,
3067                                 /*24*/ MatZeroRows_MPIAIJ,
3068                                        0,
3069 #if defined(PETSC_HAVE_PBGL)
3070                                        0,
3071 #else
3072                                        0,
3073 #endif
3074                                        0,
3075                                        0,
3076                                 /*29*/ MatSetUp_MPIAIJ,
3077 #if defined(PETSC_HAVE_PBGL)
3078                                        0,
3079 #else
3080                                        0,
3081 #endif
3082                                        0,
3083                                        0,
3084                                        0,
3085                                 /*34*/ MatDuplicate_MPIAIJ,
3086                                        0,
3087                                        0,
3088                                        0,
3089                                        0,
3090                                 /*39*/ MatAXPY_MPIAIJ,
3091                                        MatGetSubMatrices_MPIAIJ,
3092                                        MatIncreaseOverlap_MPIAIJ,
3093                                        MatGetValues_MPIAIJ,
3094                                        MatCopy_MPIAIJ,
3095                                 /*44*/ MatGetRowMax_MPIAIJ,
3096                                        MatScale_MPIAIJ,
3097                                        0,
3098                                        0,
3099                                        MatZeroRowsColumns_MPIAIJ,
3100                                 /*49*/ MatSetRandom_MPIAIJ,
3101                                        0,
3102                                        0,
3103                                        0,
3104                                        0,
3105                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3106                                        0,
3107                                        MatSetUnfactored_MPIAIJ,
3108                                        MatPermute_MPIAIJ,
3109                                        0,
3110                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3111                                        MatDestroy_MPIAIJ,
3112                                        MatView_MPIAIJ,
3113                                        0,
3114                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3115                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3116                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3117                                        0,
3118                                        0,
3119                                        0,
3120                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3121                                        MatGetRowMinAbs_MPIAIJ,
3122                                        0,
3123                                        MatSetColoring_MPIAIJ,
3124                                        0,
3125                                        MatSetValuesAdifor_MPIAIJ,
3126                                 /*75*/ MatFDColoringApply_AIJ,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                        MatFindZeroDiagonals_MPIAIJ,
3131                                 /*80*/ 0,
3132                                        0,
3133                                        0,
3134                                 /*83*/ MatLoad_MPIAIJ,
3135                                        0,
3136                                        0,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3141                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3142                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3143                                        MatPtAP_MPIAIJ_MPIAIJ,
3144                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3145                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                 /*99*/ 0,
3151                                        0,
3152                                        0,
3153                                        MatConjugate_MPIAIJ,
3154                                        0,
3155                                 /*104*/MatSetValuesRow_MPIAIJ,
3156                                        MatRealPart_MPIAIJ,
3157                                        MatImaginaryPart_MPIAIJ,
3158                                        0,
3159                                        0,
3160                                 /*109*/0,
3161                                        MatGetRedundantMatrix_MPIAIJ,
3162                                        MatGetRowMin_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3166                                        0,
3167                                        0,
3168                                        0,
3169                                        0,
3170                                 /*119*/0,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                        MatGetMultiProcBlock_MPIAIJ,
3175                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3176                                        MatGetColumnNorms_MPIAIJ,
3177                                        MatInvertBlockDiagonal_MPIAIJ,
3178                                        0,
3179                                        MatGetSubMatricesParallel_MPIAIJ,
3180                                 /*129*/0,
3181                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3182                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3183                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3184                                        0,
3185                                 /*134*/0,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                 /*139*/0,
3191                                        0,
3192                                        0,
3193                                        MatFDColoringSetUp_MPIXAIJ
3194 };
3195 
3196 /* ----------------------------------------------------------------------------------------*/
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3200 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3201 {
3202   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3203   PetscErrorCode ierr;
3204 
3205   PetscFunctionBegin;
3206   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3207   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3213 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3214 {
3215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3216   PetscErrorCode ierr;
3217 
3218   PetscFunctionBegin;
3219   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3220   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3226 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3227 {
3228   Mat_MPIAIJ     *b;
3229   PetscErrorCode ierr;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3234   b = (Mat_MPIAIJ*)B->data;
3235 
3236   if (!B->preallocated) {
3237     /* Explicitly create 2 MATSEQAIJ matrices. */
3238     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3239     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3240     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3241     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3242     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3243     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3244     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3245     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3246     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3247     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3248   }
3249 
3250   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3251   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3252   B->preallocated = PETSC_TRUE;
3253   PetscFunctionReturn(0);
3254 }
3255 
3256 #undef __FUNCT__
3257 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3258 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3259 {
3260   Mat            mat;
3261   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3262   PetscErrorCode ierr;
3263 
3264   PetscFunctionBegin;
3265   *newmat = 0;
3266   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3267   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3268   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3269   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3270   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3271   a       = (Mat_MPIAIJ*)mat->data;
3272 
3273   mat->factortype   = matin->factortype;
3274   mat->assembled    = PETSC_TRUE;
3275   mat->insertmode   = NOT_SET_VALUES;
3276   mat->preallocated = PETSC_TRUE;
3277 
3278   a->size         = oldmat->size;
3279   a->rank         = oldmat->rank;
3280   a->donotstash   = oldmat->donotstash;
3281   a->roworiented  = oldmat->roworiented;
3282   a->rowindices   = 0;
3283   a->rowvalues    = 0;
3284   a->getrowactive = PETSC_FALSE;
3285 
3286   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3287   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3288 
3289   if (oldmat->colmap) {
3290 #if defined(PETSC_USE_CTABLE)
3291     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3292 #else
3293     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3294     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3295     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3296 #endif
3297   } else a->colmap = 0;
3298   if (oldmat->garray) {
3299     PetscInt len;
3300     len  = oldmat->B->cmap->n;
3301     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3302     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3303     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3304   } else a->garray = 0;
3305 
3306   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3307   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3308   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3309   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3310   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3311   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3312   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3313   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3314   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3315   *newmat = mat;
3316   PetscFunctionReturn(0);
3317 }
3318 
3319 
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatLoad_MPIAIJ"
3323 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3324 {
3325   PetscScalar    *vals,*svals;
3326   MPI_Comm       comm;
3327   PetscErrorCode ierr;
3328   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3329   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3330   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3331   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3332   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3333   int            fd;
3334   PetscInt       bs = 1;
3335 
3336   PetscFunctionBegin;
3337   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3338   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3339   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3340   if (!rank) {
3341     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3342     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3343     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3344   }
3345 
3346   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3347   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3348   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3349 
3350   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3351 
3352   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3353   M    = header[1]; N = header[2];
3354   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3355   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3356   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3357 
3358   /* If global sizes are set, check if they are consistent with that given in the file */
3359   if (sizesset) {
3360     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3361   }
3362   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3363   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3364 
3365   /* determine ownership of all (block) rows */
3366   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3367   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3368   else m = newMat->rmap->n; /* Set by user */
3369 
3370   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3371   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3372 
3373   /* First process needs enough room for process with most rows */
3374   if (!rank) {
3375     mmax = rowners[1];
3376     for (i=2; i<=size; i++) {
3377       mmax = PetscMax(mmax, rowners[i]);
3378     }
3379   } else mmax = -1;             /* unused, but compilers complain */
3380 
3381   rowners[0] = 0;
3382   for (i=2; i<=size; i++) {
3383     rowners[i] += rowners[i-1];
3384   }
3385   rstart = rowners[rank];
3386   rend   = rowners[rank+1];
3387 
3388   /* distribute row lengths to all processors */
3389   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3390   if (!rank) {
3391     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3392     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3393     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3394     for (j=0; j<m; j++) {
3395       procsnz[0] += ourlens[j];
3396     }
3397     for (i=1; i<size; i++) {
3398       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3399       /* calculate the number of nonzeros on each processor */
3400       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3401         procsnz[i] += rowlengths[j];
3402       }
3403       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3404     }
3405     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3406   } else {
3407     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3408   }
3409 
3410   if (!rank) {
3411     /* determine max buffer needed and allocate it */
3412     maxnz = 0;
3413     for (i=0; i<size; i++) {
3414       maxnz = PetscMax(maxnz,procsnz[i]);
3415     }
3416     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3417 
3418     /* read in my part of the matrix column indices  */
3419     nz   = procsnz[0];
3420     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3421     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3422 
3423     /* read in every one elses and ship off */
3424     for (i=1; i<size; i++) {
3425       nz   = procsnz[i];
3426       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3427       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3428     }
3429     ierr = PetscFree(cols);CHKERRQ(ierr);
3430   } else {
3431     /* determine buffer space needed for message */
3432     nz = 0;
3433     for (i=0; i<m; i++) {
3434       nz += ourlens[i];
3435     }
3436     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3437 
3438     /* receive message of column indices*/
3439     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3440   }
3441 
3442   /* determine column ownership if matrix is not square */
3443   if (N != M) {
3444     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3445     else n = newMat->cmap->n;
3446     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3447     cstart = cend - n;
3448   } else {
3449     cstart = rstart;
3450     cend   = rend;
3451     n      = cend - cstart;
3452   }
3453 
3454   /* loop over local rows, determining number of off diagonal entries */
3455   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3456   jj   = 0;
3457   for (i=0; i<m; i++) {
3458     for (j=0; j<ourlens[i]; j++) {
3459       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3460       jj++;
3461     }
3462   }
3463 
3464   for (i=0; i<m; i++) {
3465     ourlens[i] -= offlens[i];
3466   }
3467   if (!sizesset) {
3468     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3469   }
3470 
3471   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3472 
3473   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3474 
3475   for (i=0; i<m; i++) {
3476     ourlens[i] += offlens[i];
3477   }
3478 
3479   if (!rank) {
3480     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3481 
3482     /* read in my part of the matrix numerical values  */
3483     nz   = procsnz[0];
3484     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3485 
3486     /* insert into matrix */
3487     jj      = rstart;
3488     smycols = mycols;
3489     svals   = vals;
3490     for (i=0; i<m; i++) {
3491       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3492       smycols += ourlens[i];
3493       svals   += ourlens[i];
3494       jj++;
3495     }
3496 
3497     /* read in other processors and ship out */
3498     for (i=1; i<size; i++) {
3499       nz   = procsnz[i];
3500       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3501       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3502     }
3503     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3504   } else {
3505     /* receive numeric values */
3506     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3507 
3508     /* receive message of values*/
3509     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3510 
3511     /* insert into matrix */
3512     jj      = rstart;
3513     smycols = mycols;
3514     svals   = vals;
3515     for (i=0; i<m; i++) {
3516       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3517       smycols += ourlens[i];
3518       svals   += ourlens[i];
3519       jj++;
3520     }
3521   }
3522   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3523   ierr = PetscFree(vals);CHKERRQ(ierr);
3524   ierr = PetscFree(mycols);CHKERRQ(ierr);
3525   ierr = PetscFree(rowners);CHKERRQ(ierr);
3526   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3527   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3528   PetscFunctionReturn(0);
3529 }
3530 
3531 #undef __FUNCT__
3532 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3533 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3534 {
3535   PetscErrorCode ierr;
3536   IS             iscol_local;
3537   PetscInt       csize;
3538 
3539   PetscFunctionBegin;
3540   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3541   if (call == MAT_REUSE_MATRIX) {
3542     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3543     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3544   } else {
3545     PetscInt cbs;
3546     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3547     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3548     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3549   }
3550   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3551   if (call == MAT_INITIAL_MATRIX) {
3552     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3553     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3554   }
3555   PetscFunctionReturn(0);
3556 }
3557 
3558 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3559 #undef __FUNCT__
3560 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3561 /*
3562     Not great since it makes two copies of the submatrix, first an SeqAIJ
3563   in local and then by concatenating the local matrices the end result.
3564   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3565 
3566   Note: This requires a sequential iscol with all indices.
3567 */
3568 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3569 {
3570   PetscErrorCode ierr;
3571   PetscMPIInt    rank,size;
3572   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3573   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3574   PetscBool      allcolumns, colflag;
3575   Mat            M,Mreuse;
3576   MatScalar      *vwork,*aa;
3577   MPI_Comm       comm;
3578   Mat_SeqAIJ     *aij;
3579 
3580   PetscFunctionBegin;
3581   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3582   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3583   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3584 
3585   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3586   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3587   if (colflag && ncol == mat->cmap->N) {
3588     allcolumns = PETSC_TRUE;
3589   } else {
3590     allcolumns = PETSC_FALSE;
3591   }
3592   if (call ==  MAT_REUSE_MATRIX) {
3593     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3594     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3595     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3596   } else {
3597     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3598   }
3599 
3600   /*
3601       m - number of local rows
3602       n - number of columns (same on all processors)
3603       rstart - first row in new global matrix generated
3604   */
3605   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3606   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3607   if (call == MAT_INITIAL_MATRIX) {
3608     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3609     ii  = aij->i;
3610     jj  = aij->j;
3611 
3612     /*
3613         Determine the number of non-zeros in the diagonal and off-diagonal
3614         portions of the matrix in order to do correct preallocation
3615     */
3616 
3617     /* first get start and end of "diagonal" columns */
3618     if (csize == PETSC_DECIDE) {
3619       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3620       if (mglobal == n) { /* square matrix */
3621         nlocal = m;
3622       } else {
3623         nlocal = n/size + ((n % size) > rank);
3624       }
3625     } else {
3626       nlocal = csize;
3627     }
3628     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3629     rstart = rend - nlocal;
3630     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3631 
3632     /* next, compute all the lengths */
3633     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3634     olens = dlens + m;
3635     for (i=0; i<m; i++) {
3636       jend = ii[i+1] - ii[i];
3637       olen = 0;
3638       dlen = 0;
3639       for (j=0; j<jend; j++) {
3640         if (*jj < rstart || *jj >= rend) olen++;
3641         else dlen++;
3642         jj++;
3643       }
3644       olens[i] = olen;
3645       dlens[i] = dlen;
3646     }
3647     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3648     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3649     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3650     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3651     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3652     ierr = PetscFree(dlens);CHKERRQ(ierr);
3653   } else {
3654     PetscInt ml,nl;
3655 
3656     M    = *newmat;
3657     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3658     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3659     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3660     /*
3661          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3662        rather than the slower MatSetValues().
3663     */
3664     M->was_assembled = PETSC_TRUE;
3665     M->assembled     = PETSC_FALSE;
3666   }
3667   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3668   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3669   ii   = aij->i;
3670   jj   = aij->j;
3671   aa   = aij->a;
3672   for (i=0; i<m; i++) {
3673     row   = rstart + i;
3674     nz    = ii[i+1] - ii[i];
3675     cwork = jj;     jj += nz;
3676     vwork = aa;     aa += nz;
3677     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3678   }
3679 
3680   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3681   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3682   *newmat = M;
3683 
3684   /* save submatrix used in processor for next request */
3685   if (call ==  MAT_INITIAL_MATRIX) {
3686     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3687     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3688   }
3689   PetscFunctionReturn(0);
3690 }
3691 
3692 #undef __FUNCT__
3693 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3694 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3695 {
3696   PetscInt       m,cstart, cend,j,nnz,i,d;
3697   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3698   const PetscInt *JJ;
3699   PetscScalar    *values;
3700   PetscErrorCode ierr;
3701 
3702   PetscFunctionBegin;
3703   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3704 
3705   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3706   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3707   m      = B->rmap->n;
3708   cstart = B->cmap->rstart;
3709   cend   = B->cmap->rend;
3710   rstart = B->rmap->rstart;
3711 
3712   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3713 
3714 #if defined(PETSC_USE_DEBUGGING)
3715   for (i=0; i<m; i++) {
3716     nnz = Ii[i+1]- Ii[i];
3717     JJ  = J + Ii[i];
3718     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3719     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3720     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3721   }
3722 #endif
3723 
3724   for (i=0; i<m; i++) {
3725     nnz     = Ii[i+1]- Ii[i];
3726     JJ      = J + Ii[i];
3727     nnz_max = PetscMax(nnz_max,nnz);
3728     d       = 0;
3729     for (j=0; j<nnz; j++) {
3730       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3731     }
3732     d_nnz[i] = d;
3733     o_nnz[i] = nnz - d;
3734   }
3735   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3736   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3737 
3738   if (v) values = (PetscScalar*)v;
3739   else {
3740     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3741   }
3742 
3743   for (i=0; i<m; i++) {
3744     ii   = i + rstart;
3745     nnz  = Ii[i+1]- Ii[i];
3746     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3747   }
3748   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3750 
3751   if (!v) {
3752     ierr = PetscFree(values);CHKERRQ(ierr);
3753   }
3754   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 #undef __FUNCT__
3759 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3760 /*@
3761    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3762    (the default parallel PETSc format).
3763 
3764    Collective on MPI_Comm
3765 
3766    Input Parameters:
3767 +  B - the matrix
3768 .  i - the indices into j for the start of each local row (starts with zero)
3769 .  j - the column indices for each local row (starts with zero)
3770 -  v - optional values in the matrix
3771 
3772    Level: developer
3773 
3774    Notes:
3775        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3776      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3777      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3778 
3779        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3780 
3781        The format which is used for the sparse matrix input, is equivalent to a
3782     row-major ordering.. i.e for the following matrix, the input data expected is
3783     as shown:
3784 
3785         1 0 0
3786         2 0 3     P0
3787        -------
3788         4 5 6     P1
3789 
3790      Process0 [P0]: rows_owned=[0,1]
3791         i =  {0,1,3}  [size = nrow+1  = 2+1]
3792         j =  {0,0,2}  [size = nz = 6]
3793         v =  {1,2,3}  [size = nz = 6]
3794 
3795      Process1 [P1]: rows_owned=[2]
3796         i =  {0,3}    [size = nrow+1  = 1+1]
3797         j =  {0,1,2}  [size = nz = 6]
3798         v =  {4,5,6}  [size = nz = 6]
3799 
3800 .keywords: matrix, aij, compressed row, sparse, parallel
3801 
3802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3803           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3804 @*/
3805 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3806 {
3807   PetscErrorCode ierr;
3808 
3809   PetscFunctionBegin;
3810   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3811   PetscFunctionReturn(0);
3812 }
3813 
3814 #undef __FUNCT__
3815 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3816 /*@C
3817    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3818    (the default parallel PETSc format).  For good matrix assembly performance
3819    the user should preallocate the matrix storage by setting the parameters
3820    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3821    performance can be increased by more than a factor of 50.
3822 
3823    Collective on MPI_Comm
3824 
3825    Input Parameters:
3826 +  A - the matrix
3827 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3828            (same value is used for all local rows)
3829 .  d_nnz - array containing the number of nonzeros in the various rows of the
3830            DIAGONAL portion of the local submatrix (possibly different for each row)
3831            or NULL, if d_nz is used to specify the nonzero structure.
3832            The size of this array is equal to the number of local rows, i.e 'm'.
3833            For matrices that will be factored, you must leave room for (and set)
3834            the diagonal entry even if it is zero.
3835 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3836            submatrix (same value is used for all local rows).
3837 -  o_nnz - array containing the number of nonzeros in the various rows of the
3838            OFF-DIAGONAL portion of the local submatrix (possibly different for
3839            each row) or NULL, if o_nz is used to specify the nonzero
3840            structure. The size of this array is equal to the number
3841            of local rows, i.e 'm'.
3842 
3843    If the *_nnz parameter is given then the *_nz parameter is ignored
3844 
3845    The AIJ format (also called the Yale sparse matrix format or
3846    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3847    storage.  The stored row and column indices begin with zero.
3848    See Users-Manual: ch_mat for details.
3849 
3850    The parallel matrix is partitioned such that the first m0 rows belong to
3851    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3852    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3853 
3854    The DIAGONAL portion of the local submatrix of a processor can be defined
3855    as the submatrix which is obtained by extraction the part corresponding to
3856    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3857    first row that belongs to the processor, r2 is the last row belonging to
3858    the this processor, and c1-c2 is range of indices of the local part of a
3859    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3860    common case of a square matrix, the row and column ranges are the same and
3861    the DIAGONAL part is also square. The remaining portion of the local
3862    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3863 
3864    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3865 
3866    You can call MatGetInfo() to get information on how effective the preallocation was;
3867    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3868    You can also run with the option -info and look for messages with the string
3869    malloc in them to see if additional memory allocation was needed.
3870 
3871    Example usage:
3872 
3873    Consider the following 8x8 matrix with 34 non-zero values, that is
3874    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3875    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3876    as follows:
3877 
3878 .vb
3879             1  2  0  |  0  3  0  |  0  4
3880     Proc0   0  5  6  |  7  0  0  |  8  0
3881             9  0 10  | 11  0  0  | 12  0
3882     -------------------------------------
3883            13  0 14  | 15 16 17  |  0  0
3884     Proc1   0 18  0  | 19 20 21  |  0  0
3885             0  0  0  | 22 23  0  | 24  0
3886     -------------------------------------
3887     Proc2  25 26 27  |  0  0 28  | 29  0
3888            30  0  0  | 31 32 33  |  0 34
3889 .ve
3890 
3891    This can be represented as a collection of submatrices as:
3892 
3893 .vb
3894       A B C
3895       D E F
3896       G H I
3897 .ve
3898 
3899    Where the submatrices A,B,C are owned by proc0, D,E,F are
3900    owned by proc1, G,H,I are owned by proc2.
3901 
3902    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3903    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3904    The 'M','N' parameters are 8,8, and have the same values on all procs.
3905 
3906    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3907    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3908    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3909    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3910    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3911    matrix, ans [DF] as another SeqAIJ matrix.
3912 
3913    When d_nz, o_nz parameters are specified, d_nz storage elements are
3914    allocated for every row of the local diagonal submatrix, and o_nz
3915    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3916    One way to choose d_nz and o_nz is to use the max nonzerors per local
3917    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3918    In this case, the values of d_nz,o_nz are:
3919 .vb
3920      proc0 : dnz = 2, o_nz = 2
3921      proc1 : dnz = 3, o_nz = 2
3922      proc2 : dnz = 1, o_nz = 4
3923 .ve
3924    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3925    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3926    for proc3. i.e we are using 12+15+10=37 storage locations to store
3927    34 values.
3928 
3929    When d_nnz, o_nnz parameters are specified, the storage is specified
3930    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3931    In the above case the values for d_nnz,o_nnz are:
3932 .vb
3933      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3934      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3935      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3936 .ve
3937    Here the space allocated is sum of all the above values i.e 34, and
3938    hence pre-allocation is perfect.
3939 
3940    Level: intermediate
3941 
3942 .keywords: matrix, aij, compressed row, sparse, parallel
3943 
3944 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3945           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3946 @*/
3947 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3948 {
3949   PetscErrorCode ierr;
3950 
3951   PetscFunctionBegin;
3952   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3953   PetscValidType(B,1);
3954   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3955   PetscFunctionReturn(0);
3956 }
3957 
3958 #undef __FUNCT__
3959 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3960 /*@
3961      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3962          CSR format the local rows.
3963 
3964    Collective on MPI_Comm
3965 
3966    Input Parameters:
3967 +  comm - MPI communicator
3968 .  m - number of local rows (Cannot be PETSC_DECIDE)
3969 .  n - This value should be the same as the local size used in creating the
3970        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3971        calculated if N is given) For square matrices n is almost always m.
3972 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3973 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3974 .   i - row indices
3975 .   j - column indices
3976 -   a - matrix values
3977 
3978    Output Parameter:
3979 .   mat - the matrix
3980 
3981    Level: intermediate
3982 
3983    Notes:
3984        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3985      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3986      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3987 
3988        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3989 
3990        The format which is used for the sparse matrix input, is equivalent to a
3991     row-major ordering.. i.e for the following matrix, the input data expected is
3992     as shown:
3993 
3994         1 0 0
3995         2 0 3     P0
3996        -------
3997         4 5 6     P1
3998 
3999      Process0 [P0]: rows_owned=[0,1]
4000         i =  {0,1,3}  [size = nrow+1  = 2+1]
4001         j =  {0,0,2}  [size = nz = 6]
4002         v =  {1,2,3}  [size = nz = 6]
4003 
4004      Process1 [P1]: rows_owned=[2]
4005         i =  {0,3}    [size = nrow+1  = 1+1]
4006         j =  {0,1,2}  [size = nz = 6]
4007         v =  {4,5,6}  [size = nz = 6]
4008 
4009 .keywords: matrix, aij, compressed row, sparse, parallel
4010 
4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4012           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4013 @*/
4014 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4015 {
4016   PetscErrorCode ierr;
4017 
4018   PetscFunctionBegin;
4019   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4020   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4021   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4022   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4023   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4024   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4025   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4026   PetscFunctionReturn(0);
4027 }
4028 
4029 #undef __FUNCT__
4030 #define __FUNCT__ "MatCreateAIJ"
4031 /*@C
4032    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4033    (the default parallel PETSc format).  For good matrix assembly performance
4034    the user should preallocate the matrix storage by setting the parameters
4035    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4036    performance can be increased by more than a factor of 50.
4037 
4038    Collective on MPI_Comm
4039 
4040    Input Parameters:
4041 +  comm - MPI communicator
4042 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4043            This value should be the same as the local size used in creating the
4044            y vector for the matrix-vector product y = Ax.
4045 .  n - This value should be the same as the local size used in creating the
4046        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4047        calculated if N is given) For square matrices n is almost always m.
4048 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4049 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4050 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4051            (same value is used for all local rows)
4052 .  d_nnz - array containing the number of nonzeros in the various rows of the
4053            DIAGONAL portion of the local submatrix (possibly different for each row)
4054            or NULL, if d_nz is used to specify the nonzero structure.
4055            The size of this array is equal to the number of local rows, i.e 'm'.
4056 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4057            submatrix (same value is used for all local rows).
4058 -  o_nnz - array containing the number of nonzeros in the various rows of the
4059            OFF-DIAGONAL portion of the local submatrix (possibly different for
4060            each row) or NULL, if o_nz is used to specify the nonzero
4061            structure. The size of this array is equal to the number
4062            of local rows, i.e 'm'.
4063 
4064    Output Parameter:
4065 .  A - the matrix
4066 
4067    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4068    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4069    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4070 
4071    Notes:
4072    If the *_nnz parameter is given then the *_nz parameter is ignored
4073 
4074    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4075    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4076    storage requirements for this matrix.
4077 
4078    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4079    processor than it must be used on all processors that share the object for
4080    that argument.
4081 
4082    The user MUST specify either the local or global matrix dimensions
4083    (possibly both).
4084 
4085    The parallel matrix is partitioned across processors such that the
4086    first m0 rows belong to process 0, the next m1 rows belong to
4087    process 1, the next m2 rows belong to process 2 etc.. where
4088    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4089    values corresponding to [m x N] submatrix.
4090 
4091    The columns are logically partitioned with the n0 columns belonging
4092    to 0th partition, the next n1 columns belonging to the next
4093    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4094 
4095    The DIAGONAL portion of the local submatrix on any given processor
4096    is the submatrix corresponding to the rows and columns m,n
4097    corresponding to the given processor. i.e diagonal matrix on
4098    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4099    etc. The remaining portion of the local submatrix [m x (N-n)]
4100    constitute the OFF-DIAGONAL portion. The example below better
4101    illustrates this concept.
4102 
4103    For a square global matrix we define each processor's diagonal portion
4104    to be its local rows and the corresponding columns (a square submatrix);
4105    each processor's off-diagonal portion encompasses the remainder of the
4106    local matrix (a rectangular submatrix).
4107 
4108    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4109 
4110    When calling this routine with a single process communicator, a matrix of
4111    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4112    type of communicator, use the construction mechanism:
4113      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4114 
4115    By default, this format uses inodes (identical nodes) when possible.
4116    We search for consecutive rows with the same nonzero structure, thereby
4117    reusing matrix information to achieve increased efficiency.
4118 
4119    Options Database Keys:
4120 +  -mat_no_inode  - Do not use inodes
4121 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4122 -  -mat_aij_oneindex - Internally use indexing starting at 1
4123         rather than 0.  Note that when calling MatSetValues(),
4124         the user still MUST index entries starting at 0!
4125 
4126 
4127    Example usage:
4128 
4129    Consider the following 8x8 matrix with 34 non-zero values, that is
4130    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4131    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4132    as follows:
4133 
4134 .vb
4135             1  2  0  |  0  3  0  |  0  4
4136     Proc0   0  5  6  |  7  0  0  |  8  0
4137             9  0 10  | 11  0  0  | 12  0
4138     -------------------------------------
4139            13  0 14  | 15 16 17  |  0  0
4140     Proc1   0 18  0  | 19 20 21  |  0  0
4141             0  0  0  | 22 23  0  | 24  0
4142     -------------------------------------
4143     Proc2  25 26 27  |  0  0 28  | 29  0
4144            30  0  0  | 31 32 33  |  0 34
4145 .ve
4146 
4147    This can be represented as a collection of submatrices as:
4148 
4149 .vb
4150       A B C
4151       D E F
4152       G H I
4153 .ve
4154 
4155    Where the submatrices A,B,C are owned by proc0, D,E,F are
4156    owned by proc1, G,H,I are owned by proc2.
4157 
4158    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4159    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4160    The 'M','N' parameters are 8,8, and have the same values on all procs.
4161 
4162    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4163    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4164    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4165    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4166    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4167    matrix, ans [DF] as another SeqAIJ matrix.
4168 
4169    When d_nz, o_nz parameters are specified, d_nz storage elements are
4170    allocated for every row of the local diagonal submatrix, and o_nz
4171    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4172    One way to choose d_nz and o_nz is to use the max nonzerors per local
4173    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4174    In this case, the values of d_nz,o_nz are:
4175 .vb
4176      proc0 : dnz = 2, o_nz = 2
4177      proc1 : dnz = 3, o_nz = 2
4178      proc2 : dnz = 1, o_nz = 4
4179 .ve
4180    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4181    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4182    for proc3. i.e we are using 12+15+10=37 storage locations to store
4183    34 values.
4184 
4185    When d_nnz, o_nnz parameters are specified, the storage is specified
4186    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4187    In the above case the values for d_nnz,o_nnz are:
4188 .vb
4189      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4190      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4191      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4192 .ve
4193    Here the space allocated is sum of all the above values i.e 34, and
4194    hence pre-allocation is perfect.
4195 
4196    Level: intermediate
4197 
4198 .keywords: matrix, aij, compressed row, sparse, parallel
4199 
4200 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4201           MPIAIJ, MatCreateMPIAIJWithArrays()
4202 @*/
4203 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4204 {
4205   PetscErrorCode ierr;
4206   PetscMPIInt    size;
4207 
4208   PetscFunctionBegin;
4209   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4210   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4211   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4212   if (size > 1) {
4213     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4214     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4215   } else {
4216     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4217     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4218   }
4219   PetscFunctionReturn(0);
4220 }
4221 
4222 #undef __FUNCT__
4223 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4224 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4225 {
4226   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4227 
4228   PetscFunctionBegin;
4229   *Ad     = a->A;
4230   *Ao     = a->B;
4231   *colmap = a->garray;
4232   PetscFunctionReturn(0);
4233 }
4234 
4235 #undef __FUNCT__
4236 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4237 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4238 {
4239   PetscErrorCode ierr;
4240   PetscInt       i;
4241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4242 
4243   PetscFunctionBegin;
4244   if (coloring->ctype == IS_COLORING_GLOBAL) {
4245     ISColoringValue *allcolors,*colors;
4246     ISColoring      ocoloring;
4247 
4248     /* set coloring for diagonal portion */
4249     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4250 
4251     /* set coloring for off-diagonal portion */
4252     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4253     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4254     for (i=0; i<a->B->cmap->n; i++) {
4255       colors[i] = allcolors[a->garray[i]];
4256     }
4257     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4258     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4259     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4260     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4261   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4262     ISColoringValue *colors;
4263     PetscInt        *larray;
4264     ISColoring      ocoloring;
4265 
4266     /* set coloring for diagonal portion */
4267     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4268     for (i=0; i<a->A->cmap->n; i++) {
4269       larray[i] = i + A->cmap->rstart;
4270     }
4271     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4272     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4273     for (i=0; i<a->A->cmap->n; i++) {
4274       colors[i] = coloring->colors[larray[i]];
4275     }
4276     ierr = PetscFree(larray);CHKERRQ(ierr);
4277     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4278     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4279     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4280 
4281     /* set coloring for off-diagonal portion */
4282     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4283     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4284     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4285     for (i=0; i<a->B->cmap->n; i++) {
4286       colors[i] = coloring->colors[larray[i]];
4287     }
4288     ierr = PetscFree(larray);CHKERRQ(ierr);
4289     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4290     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4291     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4292   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4293   PetscFunctionReturn(0);
4294 }
4295 
4296 #undef __FUNCT__
4297 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4298 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4299 {
4300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4301   PetscErrorCode ierr;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4305   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4306   PetscFunctionReturn(0);
4307 }
4308 
4309 #undef __FUNCT__
4310 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4311 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4312 {
4313   PetscErrorCode ierr;
4314   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4315   PetscInt       *indx;
4316 
4317   PetscFunctionBegin;
4318   /* This routine will ONLY return MPIAIJ type matrix */
4319   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4320   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4321   if (n == PETSC_DECIDE) {
4322     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4323   }
4324   /* Check sum(n) = N */
4325   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4326   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4327 
4328   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4329   rstart -= m;
4330 
4331   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4332   for (i=0; i<m; i++) {
4333     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4334     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4335     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4336   }
4337 
4338   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4339   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4340   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4341   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4342   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4343   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4349 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4350 {
4351   PetscErrorCode ierr;
4352   PetscInt       m,N,i,rstart,nnz,Ii;
4353   PetscInt       *indx;
4354   PetscScalar    *values;
4355 
4356   PetscFunctionBegin;
4357   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4358   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4359   for (i=0; i<m; i++) {
4360     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4361     Ii   = i + rstart;
4362     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4363     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4364   }
4365   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367   PetscFunctionReturn(0);
4368 }
4369 
4370 #undef __FUNCT__
4371 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4372 /*@
4373       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4374                  matrices from each processor
4375 
4376     Collective on MPI_Comm
4377 
4378    Input Parameters:
4379 +    comm - the communicators the parallel matrix will live on
4380 .    inmat - the input sequential matrices
4381 .    n - number of local columns (or PETSC_DECIDE)
4382 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4383 
4384    Output Parameter:
4385 .    outmat - the parallel matrix generated
4386 
4387     Level: advanced
4388 
4389    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4390 
4391 @*/
4392 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4393 {
4394   PetscErrorCode ierr;
4395   PetscMPIInt    size;
4396 
4397   PetscFunctionBegin;
4398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4399   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4400   if (size == 1) {
4401     if (scall == MAT_INITIAL_MATRIX) {
4402       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4403     } else {
4404       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4405     }
4406   } else {
4407     if (scall == MAT_INITIAL_MATRIX) {
4408       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4409     }
4410     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4411   }
4412   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatFileSplit"
4418 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4419 {
4420   PetscErrorCode    ierr;
4421   PetscMPIInt       rank;
4422   PetscInt          m,N,i,rstart,nnz;
4423   size_t            len;
4424   const PetscInt    *indx;
4425   PetscViewer       out;
4426   char              *name;
4427   Mat               B;
4428   const PetscScalar *values;
4429 
4430   PetscFunctionBegin;
4431   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4432   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4433   /* Should this be the type of the diagonal block of A? */
4434   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4435   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4436   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4437   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4438   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4439   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4440   for (i=0; i<m; i++) {
4441     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4442     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4443     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4444   }
4445   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4446   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4447 
4448   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4449   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4450   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4451   sprintf(name,"%s.%d",outfile,rank);
4452   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4453   ierr = PetscFree(name);CHKERRQ(ierr);
4454   ierr = MatView(B,out);CHKERRQ(ierr);
4455   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4456   ierr = MatDestroy(&B);CHKERRQ(ierr);
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4461 #undef __FUNCT__
4462 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4463 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4464 {
4465   PetscErrorCode      ierr;
4466   Mat_Merge_SeqsToMPI *merge;
4467   PetscContainer      container;
4468 
4469   PetscFunctionBegin;
4470   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4471   if (container) {
4472     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4485     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4486     ierr = PetscFree(merge);CHKERRQ(ierr);
4487     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4488   }
4489   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #include <../src/mat/utils/freespace.h>
4494 #include <petscbt.h>
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4498 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4499 {
4500   PetscErrorCode      ierr;
4501   MPI_Comm            comm;
4502   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4503   PetscMPIInt         size,rank,taga,*len_s;
4504   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4505   PetscInt            proc,m;
4506   PetscInt            **buf_ri,**buf_rj;
4507   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4508   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4509   MPI_Request         *s_waits,*r_waits;
4510   MPI_Status          *status;
4511   MatScalar           *aa=a->a;
4512   MatScalar           **abuf_r,*ba_i;
4513   Mat_Merge_SeqsToMPI *merge;
4514   PetscContainer      container;
4515 
4516   PetscFunctionBegin;
4517   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4518   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4519 
4520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4521   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4522 
4523   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4524   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4525 
4526   bi     = merge->bi;
4527   bj     = merge->bj;
4528   buf_ri = merge->buf_ri;
4529   buf_rj = merge->buf_rj;
4530 
4531   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4532   owners = merge->rowmap->range;
4533   len_s  = merge->len_s;
4534 
4535   /* send and recv matrix values */
4536   /*-----------------------------*/
4537   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4538   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4539 
4540   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4541   for (proc=0,k=0; proc<size; proc++) {
4542     if (!len_s[proc]) continue;
4543     i    = owners[proc];
4544     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4545     k++;
4546   }
4547 
4548   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4549   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4550   ierr = PetscFree(status);CHKERRQ(ierr);
4551 
4552   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4553   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4554 
4555   /* insert mat values of mpimat */
4556   /*----------------------------*/
4557   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4558   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4559 
4560   for (k=0; k<merge->nrecv; k++) {
4561     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4562     nrows       = *(buf_ri_k[k]);
4563     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4564     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4565   }
4566 
4567   /* set values of ba */
4568   m = merge->rowmap->n;
4569   for (i=0; i<m; i++) {
4570     arow = owners[rank] + i;
4571     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4572     bnzi = bi[i+1] - bi[i];
4573     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4574 
4575     /* add local non-zero vals of this proc's seqmat into ba */
4576     anzi   = ai[arow+1] - ai[arow];
4577     aj     = a->j + ai[arow];
4578     aa     = a->a + ai[arow];
4579     nextaj = 0;
4580     for (j=0; nextaj<anzi; j++) {
4581       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4582         ba_i[j] += aa[nextaj++];
4583       }
4584     }
4585 
4586     /* add received vals into ba */
4587     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4588       /* i-th row */
4589       if (i == *nextrow[k]) {
4590         anzi   = *(nextai[k]+1) - *nextai[k];
4591         aj     = buf_rj[k] + *(nextai[k]);
4592         aa     = abuf_r[k] + *(nextai[k]);
4593         nextaj = 0;
4594         for (j=0; nextaj<anzi; j++) {
4595           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4596             ba_i[j] += aa[nextaj++];
4597           }
4598         }
4599         nextrow[k]++; nextai[k]++;
4600       }
4601     }
4602     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4603   }
4604   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4605   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4606 
4607   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4608   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4609   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4610   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4611   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4612   PetscFunctionReturn(0);
4613 }
4614 
4615 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4616 
4617 #undef __FUNCT__
4618 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4619 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4620 {
4621   PetscErrorCode      ierr;
4622   Mat                 B_mpi;
4623   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4624   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4625   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4626   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4627   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4628   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4629   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4630   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4631   MPI_Status          *status;
4632   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4633   PetscBT             lnkbt;
4634   Mat_Merge_SeqsToMPI *merge;
4635   PetscContainer      container;
4636 
4637   PetscFunctionBegin;
4638   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4639 
4640   /* make sure it is a PETSc comm */
4641   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4642   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4643   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4644 
4645   ierr = PetscNew(&merge);CHKERRQ(ierr);
4646   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4647 
4648   /* determine row ownership */
4649   /*---------------------------------------------------------*/
4650   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4651   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4652   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4654   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4655   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4656   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4657 
4658   m      = merge->rowmap->n;
4659   owners = merge->rowmap->range;
4660 
4661   /* determine the number of messages to send, their lengths */
4662   /*---------------------------------------------------------*/
4663   len_s = merge->len_s;
4664 
4665   len          = 0; /* length of buf_si[] */
4666   merge->nsend = 0;
4667   for (proc=0; proc<size; proc++) {
4668     len_si[proc] = 0;
4669     if (proc == rank) {
4670       len_s[proc] = 0;
4671     } else {
4672       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4673       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4674     }
4675     if (len_s[proc]) {
4676       merge->nsend++;
4677       nrows = 0;
4678       for (i=owners[proc]; i<owners[proc+1]; i++) {
4679         if (ai[i+1] > ai[i]) nrows++;
4680       }
4681       len_si[proc] = 2*(nrows+1);
4682       len         += len_si[proc];
4683     }
4684   }
4685 
4686   /* determine the number and length of messages to receive for ij-structure */
4687   /*-------------------------------------------------------------------------*/
4688   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4689   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4690 
4691   /* post the Irecv of j-structure */
4692   /*-------------------------------*/
4693   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4694   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4695 
4696   /* post the Isend of j-structure */
4697   /*--------------------------------*/
4698   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4699 
4700   for (proc=0, k=0; proc<size; proc++) {
4701     if (!len_s[proc]) continue;
4702     i    = owners[proc];
4703     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4704     k++;
4705   }
4706 
4707   /* receives and sends of j-structure are complete */
4708   /*------------------------------------------------*/
4709   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4710   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4711 
4712   /* send and recv i-structure */
4713   /*---------------------------*/
4714   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4715   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4716 
4717   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4718   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4719   for (proc=0,k=0; proc<size; proc++) {
4720     if (!len_s[proc]) continue;
4721     /* form outgoing message for i-structure:
4722          buf_si[0]:                 nrows to be sent
4723                [1:nrows]:           row index (global)
4724                [nrows+1:2*nrows+1]: i-structure index
4725     */
4726     /*-------------------------------------------*/
4727     nrows       = len_si[proc]/2 - 1;
4728     buf_si_i    = buf_si + nrows+1;
4729     buf_si[0]   = nrows;
4730     buf_si_i[0] = 0;
4731     nrows       = 0;
4732     for (i=owners[proc]; i<owners[proc+1]; i++) {
4733       anzi = ai[i+1] - ai[i];
4734       if (anzi) {
4735         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4736         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4737         nrows++;
4738       }
4739     }
4740     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4741     k++;
4742     buf_si += len_si[proc];
4743   }
4744 
4745   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4746   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4747 
4748   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4749   for (i=0; i<merge->nrecv; i++) {
4750     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4751   }
4752 
4753   ierr = PetscFree(len_si);CHKERRQ(ierr);
4754   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4755   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4756   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4757   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4758   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4759   ierr = PetscFree(status);CHKERRQ(ierr);
4760 
4761   /* compute a local seq matrix in each processor */
4762   /*----------------------------------------------*/
4763   /* allocate bi array and free space for accumulating nonzero column info */
4764   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4765   bi[0] = 0;
4766 
4767   /* create and initialize a linked list */
4768   nlnk = N+1;
4769   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4770 
4771   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4772   len  = ai[owners[rank+1]] - ai[owners[rank]];
4773   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4774 
4775   current_space = free_space;
4776 
4777   /* determine symbolic info for each local row */
4778   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4779 
4780   for (k=0; k<merge->nrecv; k++) {
4781     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4782     nrows       = *buf_ri_k[k];
4783     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4784     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4785   }
4786 
4787   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4788   len  = 0;
4789   for (i=0; i<m; i++) {
4790     bnzi = 0;
4791     /* add local non-zero cols of this proc's seqmat into lnk */
4792     arow  = owners[rank] + i;
4793     anzi  = ai[arow+1] - ai[arow];
4794     aj    = a->j + ai[arow];
4795     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4796     bnzi += nlnk;
4797     /* add received col data into lnk */
4798     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4799       if (i == *nextrow[k]) { /* i-th row */
4800         anzi  = *(nextai[k]+1) - *nextai[k];
4801         aj    = buf_rj[k] + *nextai[k];
4802         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803         bnzi += nlnk;
4804         nextrow[k]++; nextai[k]++;
4805       }
4806     }
4807     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4808 
4809     /* if free space is not available, make more free space */
4810     if (current_space->local_remaining<bnzi) {
4811       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4812       nspacedouble++;
4813     }
4814     /* copy data into free space, then initialize lnk */
4815     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4816     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4817 
4818     current_space->array           += bnzi;
4819     current_space->local_used      += bnzi;
4820     current_space->local_remaining -= bnzi;
4821 
4822     bi[i+1] = bi[i] + bnzi;
4823   }
4824 
4825   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4826 
4827   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4828   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4829   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4830 
4831   /* create symbolic parallel matrix B_mpi */
4832   /*---------------------------------------*/
4833   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4834   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4835   if (n==PETSC_DECIDE) {
4836     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4837   } else {
4838     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4839   }
4840   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4841   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4842   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4843   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4844   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4845 
4846   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4847   B_mpi->assembled    = PETSC_FALSE;
4848   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4849   merge->bi           = bi;
4850   merge->bj           = bj;
4851   merge->buf_ri       = buf_ri;
4852   merge->buf_rj       = buf_rj;
4853   merge->coi          = NULL;
4854   merge->coj          = NULL;
4855   merge->owners_co    = NULL;
4856 
4857   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4858 
4859   /* attach the supporting struct to B_mpi for reuse */
4860   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4861   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4862   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4863   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4864   *mpimat = B_mpi;
4865 
4866   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4867   PetscFunctionReturn(0);
4868 }
4869 
4870 #undef __FUNCT__
4871 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4872 /*@C
4873       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4874                  matrices from each processor
4875 
4876     Collective on MPI_Comm
4877 
4878    Input Parameters:
4879 +    comm - the communicators the parallel matrix will live on
4880 .    seqmat - the input sequential matrices
4881 .    m - number of local rows (or PETSC_DECIDE)
4882 .    n - number of local columns (or PETSC_DECIDE)
4883 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 
4885    Output Parameter:
4886 .    mpimat - the parallel matrix generated
4887 
4888     Level: advanced
4889 
4890    Notes:
4891      The dimensions of the sequential matrix in each processor MUST be the same.
4892      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4893      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4894 @*/
4895 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4896 {
4897   PetscErrorCode ierr;
4898   PetscMPIInt    size;
4899 
4900   PetscFunctionBegin;
4901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4902   if (size == 1) {
4903     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904     if (scall == MAT_INITIAL_MATRIX) {
4905       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4906     } else {
4907       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4908     }
4909     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910     PetscFunctionReturn(0);
4911   }
4912   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4913   if (scall == MAT_INITIAL_MATRIX) {
4914     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4915   }
4916   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4917   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4918   PetscFunctionReturn(0);
4919 }
4920 
4921 #undef __FUNCT__
4922 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4923 /*@
4924      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4925           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4926           with MatGetSize()
4927 
4928     Not Collective
4929 
4930    Input Parameters:
4931 +    A - the matrix
4932 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4933 
4934    Output Parameter:
4935 .    A_loc - the local sequential matrix generated
4936 
4937     Level: developer
4938 
4939 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4940 
4941 @*/
4942 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4943 {
4944   PetscErrorCode ierr;
4945   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4946   Mat_SeqAIJ     *mat,*a,*b;
4947   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4948   MatScalar      *aa,*ba,*cam;
4949   PetscScalar    *ca;
4950   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4951   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4952   PetscBool      match;
4953 
4954   PetscFunctionBegin;
4955   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4956   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4957   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4958   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4959   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4960   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4961   aa = a->a; ba = b->a;
4962   if (scall == MAT_INITIAL_MATRIX) {
4963     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4964     ci[0] = 0;
4965     for (i=0; i<am; i++) {
4966       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4967     }
4968     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4969     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4970     k    = 0;
4971     for (i=0; i<am; i++) {
4972       ncols_o = bi[i+1] - bi[i];
4973       ncols_d = ai[i+1] - ai[i];
4974       /* off-diagonal portion of A */
4975       for (jo=0; jo<ncols_o; jo++) {
4976         col = cmap[*bj];
4977         if (col >= cstart) break;
4978         cj[k]   = col; bj++;
4979         ca[k++] = *ba++;
4980       }
4981       /* diagonal portion of A */
4982       for (j=0; j<ncols_d; j++) {
4983         cj[k]   = cstart + *aj++;
4984         ca[k++] = *aa++;
4985       }
4986       /* off-diagonal portion of A */
4987       for (j=jo; j<ncols_o; j++) {
4988         cj[k]   = cmap[*bj++];
4989         ca[k++] = *ba++;
4990       }
4991     }
4992     /* put together the new matrix */
4993     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4994     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4995     /* Since these are PETSc arrays, change flags to free them as necessary. */
4996     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4997     mat->free_a  = PETSC_TRUE;
4998     mat->free_ij = PETSC_TRUE;
4999     mat->nonew   = 0;
5000   } else if (scall == MAT_REUSE_MATRIX) {
5001     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5002     ci = mat->i; cj = mat->j; cam = mat->a;
5003     for (i=0; i<am; i++) {
5004       /* off-diagonal portion of A */
5005       ncols_o = bi[i+1] - bi[i];
5006       for (jo=0; jo<ncols_o; jo++) {
5007         col = cmap[*bj];
5008         if (col >= cstart) break;
5009         *cam++ = *ba++; bj++;
5010       }
5011       /* diagonal portion of A */
5012       ncols_d = ai[i+1] - ai[i];
5013       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5014       /* off-diagonal portion of A */
5015       for (j=jo; j<ncols_o; j++) {
5016         *cam++ = *ba++; bj++;
5017       }
5018     }
5019   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5020   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5021   PetscFunctionReturn(0);
5022 }
5023 
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5026 /*@C
5027      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5028 
5029     Not Collective
5030 
5031    Input Parameters:
5032 +    A - the matrix
5033 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5034 -    row, col - index sets of rows and columns to extract (or NULL)
5035 
5036    Output Parameter:
5037 .    A_loc - the local sequential matrix generated
5038 
5039     Level: developer
5040 
5041 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5042 
5043 @*/
5044 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5045 {
5046   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5047   PetscErrorCode ierr;
5048   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5049   IS             isrowa,iscola;
5050   Mat            *aloc;
5051   PetscBool      match;
5052 
5053   PetscFunctionBegin;
5054   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5055   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5056   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5057   if (!row) {
5058     start = A->rmap->rstart; end = A->rmap->rend;
5059     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5060   } else {
5061     isrowa = *row;
5062   }
5063   if (!col) {
5064     start = A->cmap->rstart;
5065     cmap  = a->garray;
5066     nzA   = a->A->cmap->n;
5067     nzB   = a->B->cmap->n;
5068     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5069     ncols = 0;
5070     for (i=0; i<nzB; i++) {
5071       if (cmap[i] < start) idx[ncols++] = cmap[i];
5072       else break;
5073     }
5074     imark = i;
5075     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5076     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5077     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5078   } else {
5079     iscola = *col;
5080   }
5081   if (scall != MAT_INITIAL_MATRIX) {
5082     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5083     aloc[0] = *A_loc;
5084   }
5085   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5086   *A_loc = aloc[0];
5087   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5088   if (!row) {
5089     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5090   }
5091   if (!col) {
5092     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5093   }
5094   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5095   PetscFunctionReturn(0);
5096 }
5097 
5098 #undef __FUNCT__
5099 #define __FUNCT__ "MatGetBrowsOfAcols"
5100 /*@C
5101     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5102 
5103     Collective on Mat
5104 
5105    Input Parameters:
5106 +    A,B - the matrices in mpiaij format
5107 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5108 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5109 
5110    Output Parameter:
5111 +    rowb, colb - index sets of rows and columns of B to extract
5112 -    B_seq - the sequential matrix generated
5113 
5114     Level: developer
5115 
5116 @*/
5117 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5118 {
5119   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5120   PetscErrorCode ierr;
5121   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5122   IS             isrowb,iscolb;
5123   Mat            *bseq=NULL;
5124 
5125   PetscFunctionBegin;
5126   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5127     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5128   }
5129   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5130 
5131   if (scall == MAT_INITIAL_MATRIX) {
5132     start = A->cmap->rstart;
5133     cmap  = a->garray;
5134     nzA   = a->A->cmap->n;
5135     nzB   = a->B->cmap->n;
5136     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5137     ncols = 0;
5138     for (i=0; i<nzB; i++) {  /* row < local row index */
5139       if (cmap[i] < start) idx[ncols++] = cmap[i];
5140       else break;
5141     }
5142     imark = i;
5143     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5144     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5145     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5146     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5147   } else {
5148     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5149     isrowb  = *rowb; iscolb = *colb;
5150     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5151     bseq[0] = *B_seq;
5152   }
5153   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5154   *B_seq = bseq[0];
5155   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5156   if (!rowb) {
5157     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5158   } else {
5159     *rowb = isrowb;
5160   }
5161   if (!colb) {
5162     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5163   } else {
5164     *colb = iscolb;
5165   }
5166   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5167   PetscFunctionReturn(0);
5168 }
5169 
5170 #undef __FUNCT__
5171 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5172 /*
5173     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5174     of the OFF-DIAGONAL portion of local A
5175 
5176     Collective on Mat
5177 
5178    Input Parameters:
5179 +    A,B - the matrices in mpiaij format
5180 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5181 
5182    Output Parameter:
5183 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5184 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5185 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5186 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5187 
5188     Level: developer
5189 
5190 */
5191 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5192 {
5193   VecScatter_MPI_General *gen_to,*gen_from;
5194   PetscErrorCode         ierr;
5195   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5196   Mat_SeqAIJ             *b_oth;
5197   VecScatter             ctx =a->Mvctx;
5198   MPI_Comm               comm;
5199   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5200   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5201   PetscScalar            *rvalues,*svalues;
5202   MatScalar              *b_otha,*bufa,*bufA;
5203   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5204   MPI_Request            *rwaits = NULL,*swaits = NULL;
5205   MPI_Status             *sstatus,rstatus;
5206   PetscMPIInt            jj;
5207   PetscInt               *cols,sbs,rbs;
5208   PetscScalar            *vals;
5209 
5210   PetscFunctionBegin;
5211   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5212   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5213     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5214   }
5215   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5216   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5217 
5218   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5219   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5220   rvalues  = gen_from->values; /* holds the length of receiving row */
5221   svalues  = gen_to->values;   /* holds the length of sending row */
5222   nrecvs   = gen_from->n;
5223   nsends   = gen_to->n;
5224 
5225   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5226   srow    = gen_to->indices;    /* local row index to be sent */
5227   sstarts = gen_to->starts;
5228   sprocs  = gen_to->procs;
5229   sstatus = gen_to->sstatus;
5230   sbs     = gen_to->bs;
5231   rstarts = gen_from->starts;
5232   rprocs  = gen_from->procs;
5233   rbs     = gen_from->bs;
5234 
5235   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5236   if (scall == MAT_INITIAL_MATRIX) {
5237     /* i-array */
5238     /*---------*/
5239     /*  post receives */
5240     for (i=0; i<nrecvs; i++) {
5241       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5242       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5243       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* pack the outgoing message */
5247     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5248 
5249     sstartsj[0] = 0;
5250     rstartsj[0] = 0;
5251     len         = 0; /* total length of j or a array to be sent */
5252     k           = 0;
5253     for (i=0; i<nsends; i++) {
5254       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5255       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5256       for (j=0; j<nrows; j++) {
5257         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5258         for (l=0; l<sbs; l++) {
5259           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5260 
5261           rowlen[j*sbs+l] = ncols;
5262 
5263           len += ncols;
5264           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5265         }
5266         k++;
5267       }
5268       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5269 
5270       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5271     }
5272     /* recvs and sends of i-array are completed */
5273     i = nrecvs;
5274     while (i--) {
5275       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5276     }
5277     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5278 
5279     /* allocate buffers for sending j and a arrays */
5280     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5281     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5282 
5283     /* create i-array of B_oth */
5284     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5285 
5286     b_othi[0] = 0;
5287     len       = 0; /* total length of j or a array to be received */
5288     k         = 0;
5289     for (i=0; i<nrecvs; i++) {
5290       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5291       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5292       for (j=0; j<nrows; j++) {
5293         b_othi[k+1] = b_othi[k] + rowlen[j];
5294         len        += rowlen[j]; k++;
5295       }
5296       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5297     }
5298 
5299     /* allocate space for j and a arrrays of B_oth */
5300     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5301     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5302 
5303     /* j-array */
5304     /*---------*/
5305     /*  post receives of j-array */
5306     for (i=0; i<nrecvs; i++) {
5307       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5308       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5309     }
5310 
5311     /* pack the outgoing message j-array */
5312     k = 0;
5313     for (i=0; i<nsends; i++) {
5314       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5315       bufJ  = bufj+sstartsj[i];
5316       for (j=0; j<nrows; j++) {
5317         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5318         for (ll=0; ll<sbs; ll++) {
5319           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5320           for (l=0; l<ncols; l++) {
5321             *bufJ++ = cols[l];
5322           }
5323           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5324         }
5325       }
5326       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5327     }
5328 
5329     /* recvs and sends of j-array are completed */
5330     i = nrecvs;
5331     while (i--) {
5332       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5333     }
5334     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5335   } else if (scall == MAT_REUSE_MATRIX) {
5336     sstartsj = *startsj_s;
5337     rstartsj = *startsj_r;
5338     bufa     = *bufa_ptr;
5339     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5340     b_otha   = b_oth->a;
5341   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5342 
5343   /* a-array */
5344   /*---------*/
5345   /*  post receives of a-array */
5346   for (i=0; i<nrecvs; i++) {
5347     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5348     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5349   }
5350 
5351   /* pack the outgoing message a-array */
5352   k = 0;
5353   for (i=0; i<nsends; i++) {
5354     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5355     bufA  = bufa+sstartsj[i];
5356     for (j=0; j<nrows; j++) {
5357       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5358       for (ll=0; ll<sbs; ll++) {
5359         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5360         for (l=0; l<ncols; l++) {
5361           *bufA++ = vals[l];
5362         }
5363         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5364       }
5365     }
5366     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5367   }
5368   /* recvs and sends of a-array are completed */
5369   i = nrecvs;
5370   while (i--) {
5371     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5372   }
5373   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5374   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5375 
5376   if (scall == MAT_INITIAL_MATRIX) {
5377     /* put together the new matrix */
5378     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5379 
5380     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5381     /* Since these are PETSc arrays, change flags to free them as necessary. */
5382     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5383     b_oth->free_a  = PETSC_TRUE;
5384     b_oth->free_ij = PETSC_TRUE;
5385     b_oth->nonew   = 0;
5386 
5387     ierr = PetscFree(bufj);CHKERRQ(ierr);
5388     if (!startsj_s || !bufa_ptr) {
5389       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5390       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5391     } else {
5392       *startsj_s = sstartsj;
5393       *startsj_r = rstartsj;
5394       *bufa_ptr  = bufa;
5395     }
5396   }
5397   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5398   PetscFunctionReturn(0);
5399 }
5400 
5401 #undef __FUNCT__
5402 #define __FUNCT__ "MatGetCommunicationStructs"
5403 /*@C
5404   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5405 
5406   Not Collective
5407 
5408   Input Parameters:
5409 . A - The matrix in mpiaij format
5410 
5411   Output Parameter:
5412 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5413 . colmap - A map from global column index to local index into lvec
5414 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5415 
5416   Level: developer
5417 
5418 @*/
5419 #if defined(PETSC_USE_CTABLE)
5420 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5421 #else
5422 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5423 #endif
5424 {
5425   Mat_MPIAIJ *a;
5426 
5427   PetscFunctionBegin;
5428   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5429   PetscValidPointer(lvec, 2);
5430   PetscValidPointer(colmap, 3);
5431   PetscValidPointer(multScatter, 4);
5432   a = (Mat_MPIAIJ*) A->data;
5433   if (lvec) *lvec = a->lvec;
5434   if (colmap) *colmap = a->colmap;
5435   if (multScatter) *multScatter = a->Mvctx;
5436   PetscFunctionReturn(0);
5437 }
5438 
5439 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5440 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5442 
5443 #undef __FUNCT__
5444 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5445 /*
5446     Computes (B'*A')' since computing B*A directly is untenable
5447 
5448                n                       p                          p
5449         (              )       (              )         (                  )
5450       m (      A       )  *  n (       B      )   =   m (         C        )
5451         (              )       (              )         (                  )
5452 
5453 */
5454 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5455 {
5456   PetscErrorCode ierr;
5457   Mat            At,Bt,Ct;
5458 
5459   PetscFunctionBegin;
5460   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5461   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5462   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5463   ierr = MatDestroy(&At);CHKERRQ(ierr);
5464   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5465   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5466   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 #undef __FUNCT__
5471 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5472 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5473 {
5474   PetscErrorCode ierr;
5475   PetscInt       m=A->rmap->n,n=B->cmap->n;
5476   Mat            Cmat;
5477 
5478   PetscFunctionBegin;
5479   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5480   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5481   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5482   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5483   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5484   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5485   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5486   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5487 
5488   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5489 
5490   *C = Cmat;
5491   PetscFunctionReturn(0);
5492 }
5493 
5494 /* ----------------------------------------------------------------*/
5495 #undef __FUNCT__
5496 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5497 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5498 {
5499   PetscErrorCode ierr;
5500 
5501   PetscFunctionBegin;
5502   if (scall == MAT_INITIAL_MATRIX) {
5503     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5504     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5505     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5506   }
5507   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5508   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5509   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5510   PetscFunctionReturn(0);
5511 }
5512 
5513 #if defined(PETSC_HAVE_MUMPS)
5514 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5515 #endif
5516 #if defined(PETSC_HAVE_PASTIX)
5517 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5518 #endif
5519 #if defined(PETSC_HAVE_SUPERLU_DIST)
5520 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5521 #endif
5522 #if defined(PETSC_HAVE_CLIQUE)
5523 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5524 #endif
5525 
5526 /*MC
5527    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5528 
5529    Options Database Keys:
5530 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5531 
5532   Level: beginner
5533 
5534 .seealso: MatCreateAIJ()
5535 M*/
5536 
5537 #undef __FUNCT__
5538 #define __FUNCT__ "MatCreate_MPIAIJ"
5539 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5540 {
5541   Mat_MPIAIJ     *b;
5542   PetscErrorCode ierr;
5543   PetscMPIInt    size;
5544 
5545   PetscFunctionBegin;
5546   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5547 
5548   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5549   B->data       = (void*)b;
5550   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5551   B->assembled  = PETSC_FALSE;
5552   B->insertmode = NOT_SET_VALUES;
5553   b->size       = size;
5554 
5555   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5556 
5557   /* build cache for off array entries formed */
5558   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5559 
5560   b->donotstash  = PETSC_FALSE;
5561   b->colmap      = 0;
5562   b->garray      = 0;
5563   b->roworiented = PETSC_TRUE;
5564 
5565   /* stuff used for matrix vector multiply */
5566   b->lvec  = NULL;
5567   b->Mvctx = NULL;
5568 
5569   /* stuff for MatGetRow() */
5570   b->rowindices   = 0;
5571   b->rowvalues    = 0;
5572   b->getrowactive = PETSC_FALSE;
5573 
5574   /* flexible pointer used in CUSP/CUSPARSE classes */
5575   b->spptr = NULL;
5576 
5577 #if defined(PETSC_HAVE_MUMPS)
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5579 #endif
5580 #if defined(PETSC_HAVE_PASTIX)
5581   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5582 #endif
5583 #if defined(PETSC_HAVE_SUPERLU_DIST)
5584   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5585 #endif
5586 #if defined(PETSC_HAVE_CLIQUE)
5587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5588 #endif
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5603   PetscFunctionReturn(0);
5604 }
5605 
5606 #undef __FUNCT__
5607 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5608 /*@
5609      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5610          and "off-diagonal" part of the matrix in CSR format.
5611 
5612    Collective on MPI_Comm
5613 
5614    Input Parameters:
5615 +  comm - MPI communicator
5616 .  m - number of local rows (Cannot be PETSC_DECIDE)
5617 .  n - This value should be the same as the local size used in creating the
5618        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5619        calculated if N is given) For square matrices n is almost always m.
5620 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5621 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5622 .   i - row indices for "diagonal" portion of matrix
5623 .   j - column indices
5624 .   a - matrix values
5625 .   oi - row indices for "off-diagonal" portion of matrix
5626 .   oj - column indices
5627 -   oa - matrix values
5628 
5629    Output Parameter:
5630 .   mat - the matrix
5631 
5632    Level: advanced
5633 
5634    Notes:
5635        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5636        must free the arrays once the matrix has been destroyed and not before.
5637 
5638        The i and j indices are 0 based
5639 
5640        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5641 
5642        This sets local rows and cannot be used to set off-processor values.
5643 
5644        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5645        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5646        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5647        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5648        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5649        communication if it is known that only local entries will be set.
5650 
5651 .keywords: matrix, aij, compressed row, sparse, parallel
5652 
5653 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5654           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5655 @*/
5656 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5657 {
5658   PetscErrorCode ierr;
5659   Mat_MPIAIJ     *maij;
5660 
5661   PetscFunctionBegin;
5662   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5663   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5664   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5665   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5666   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5667   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5668   maij = (Mat_MPIAIJ*) (*mat)->data;
5669 
5670   (*mat)->preallocated = PETSC_TRUE;
5671 
5672   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5673   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5674 
5675   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5676   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5677 
5678   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5679   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5680   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5681   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682 
5683   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5684   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5686   PetscFunctionReturn(0);
5687 }
5688 
5689 /*
5690     Special version for direct calls from Fortran
5691 */
5692 #include <petsc-private/fortranimpl.h>
5693 
5694 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5695 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5696 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5697 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5698 #endif
5699 
5700 /* Change these macros so can be used in void function */
5701 #undef CHKERRQ
5702 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5703 #undef SETERRQ2
5704 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5705 #undef SETERRQ3
5706 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5707 #undef SETERRQ
5708 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5709 
5710 #undef __FUNCT__
5711 #define __FUNCT__ "matsetvaluesmpiaij_"
5712 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5713 {
5714   Mat            mat  = *mmat;
5715   PetscInt       m    = *mm, n = *mn;
5716   InsertMode     addv = *maddv;
5717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5718   PetscScalar    value;
5719   PetscErrorCode ierr;
5720 
5721   MatCheckPreallocated(mat,1);
5722   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5723 
5724 #if defined(PETSC_USE_DEBUG)
5725   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5726 #endif
5727   {
5728     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5729     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5730     PetscBool roworiented = aij->roworiented;
5731 
5732     /* Some Variables required in the macro */
5733     Mat        A                 = aij->A;
5734     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5735     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5736     MatScalar  *aa               = a->a;
5737     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5738     Mat        B                 = aij->B;
5739     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5740     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5741     MatScalar  *ba               = b->a;
5742 
5743     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5744     PetscInt  nonew = a->nonew;
5745     MatScalar *ap1,*ap2;
5746 
5747     PetscFunctionBegin;
5748     for (i=0; i<m; i++) {
5749       if (im[i] < 0) continue;
5750 #if defined(PETSC_USE_DEBUG)
5751       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5752 #endif
5753       if (im[i] >= rstart && im[i] < rend) {
5754         row      = im[i] - rstart;
5755         lastcol1 = -1;
5756         rp1      = aj + ai[row];
5757         ap1      = aa + ai[row];
5758         rmax1    = aimax[row];
5759         nrow1    = ailen[row];
5760         low1     = 0;
5761         high1    = nrow1;
5762         lastcol2 = -1;
5763         rp2      = bj + bi[row];
5764         ap2      = ba + bi[row];
5765         rmax2    = bimax[row];
5766         nrow2    = bilen[row];
5767         low2     = 0;
5768         high2    = nrow2;
5769 
5770         for (j=0; j<n; j++) {
5771           if (roworiented) value = v[i*n+j];
5772           else value = v[i+j*m];
5773           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5774           if (in[j] >= cstart && in[j] < cend) {
5775             col = in[j] - cstart;
5776             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5777           } else if (in[j] < 0) continue;
5778 #if defined(PETSC_USE_DEBUG)
5779           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5780 #endif
5781           else {
5782             if (mat->was_assembled) {
5783               if (!aij->colmap) {
5784                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5785               }
5786 #if defined(PETSC_USE_CTABLE)
5787               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5788               col--;
5789 #else
5790               col = aij->colmap[in[j]] - 1;
5791 #endif
5792               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5793                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5794                 col  =  in[j];
5795                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5796                 B     = aij->B;
5797                 b     = (Mat_SeqAIJ*)B->data;
5798                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5799                 rp2   = bj + bi[row];
5800                 ap2   = ba + bi[row];
5801                 rmax2 = bimax[row];
5802                 nrow2 = bilen[row];
5803                 low2  = 0;
5804                 high2 = nrow2;
5805                 bm    = aij->B->rmap->n;
5806                 ba    = b->a;
5807               }
5808             } else col = in[j];
5809             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5810           }
5811         }
5812       } else if (!aij->donotstash) {
5813         if (roworiented) {
5814           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5815         } else {
5816           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5817         }
5818       }
5819     }
5820   }
5821   PetscFunctionReturnVoid();
5822 }
5823 
5824