xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2e71c61d18e71e694bfbe2b314256bf3aa85754d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     if (!rank) {
206       bses[0] = gmat->rmap->bs;
207       bses[1] = gmat->cmap->bs;
208     }
209     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
210     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
211     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
212     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
213     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
214     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
215 
216     rowners[0] = 0;
217     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218     rstart = rowners[rank];
219     rend   = rowners[rank+1];
220     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
221     if (!rank) {
222       gmata = (Mat_SeqAIJ*) gmat->data;
223       /* send row lengths to all processors */
224       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225       for (i=1; i<size; i++) {
226         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
227       }
228       /* determine number diagonal and off-diagonal counts */
229       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
230       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined(PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
373   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374 #endif
375   PetscFunctionReturn(0);
376 }
377 
378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379 { \
380     if (col <= lastcol1)  low1 = 0;     \
381     else                 high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else              low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414   { \
415     if (col <= lastcol2) low2 = 0;                        \
416     else high2 = nrow2;                                   \
417     lastcol2 = col;                                       \
418     while (high2-low2 > 5) {                              \
419       t = (low2+high2)/2;                                 \
420       if (rp2[t] > col) high2 = t;                        \
421       else             low2  = t;                         \
422     }                                                     \
423     for (_i=low2; _i<high2; _i++) {                       \
424       if (rp2[_i] > col) break;                           \
425       if (rp2[_i] == col) {                               \
426         if (addv == ADD_VALUES) ap2[_i] += value;         \
427         else                    ap2[_i] = value;          \
428         goto b_noinsert;                                  \
429       }                                                   \
430     }                                                     \
431     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
433     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
434     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
435     N = nrow2++ - 1; b->nz++; high2++;                    \
436     /* shift up all the later entries in this row */      \
437     for (ii=N; ii>=_i; ii--) {                            \
438       rp2[ii+1] = rp2[ii];                                \
439       ap2[ii+1] = ap2[ii];                                \
440     }                                                     \
441     rp2[_i] = col;                                        \
442     ap2[_i] = value;                                      \
443     b_noinsert: ;                                         \
444     bilen[row] = nrow2;                                   \
445   }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
450 {
451   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
452   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453   PetscErrorCode ierr;
454   PetscInt       l,*garray = mat->garray,diag;
455 
456   PetscFunctionBegin;
457   /* code only works for square matrices A */
458 
459   /* find size of row to the left of the diagonal part */
460   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
461   row  = row - diag;
462   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
463     if (garray[b->j[b->i[row]+l]] > diag) break;
464   }
465   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* diagonal part */
468   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* right of diagonal part */
471   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatSetValues_MPIAIJ"
477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478 {
479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
480   PetscScalar    value;
481   PetscErrorCode ierr;
482   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
483   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
484   PetscBool      roworiented = aij->roworiented;
485 
486   /* Some Variables required in the macro */
487   Mat        A                 = aij->A;
488   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
489   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
490   MatScalar  *aa               = a->a;
491   PetscBool  ignorezeroentries = a->ignorezeroentries;
492   Mat        B                 = aij->B;
493   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
494   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
495   MatScalar  *ba               = b->a;
496 
497   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
498   PetscInt  nonew;
499   MatScalar *ap1,*ap2;
500 
501   PetscFunctionBegin;
502   if (v) PetscValidScalarPointer(v,6);
503   for (i=0; i<m; i++) {
504     if (im[i] < 0) continue;
505 #if defined(PETSC_USE_DEBUG)
506     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
507 #endif
508     if (im[i] >= rstart && im[i] < rend) {
509       row      = im[i] - rstart;
510       lastcol1 = -1;
511       rp1      = aj + ai[row];
512       ap1      = aa + ai[row];
513       rmax1    = aimax[row];
514       nrow1    = ailen[row];
515       low1     = 0;
516       high1    = nrow1;
517       lastcol2 = -1;
518       rp2      = bj + bi[row];
519       ap2      = ba + bi[row];
520       rmax2    = bimax[row];
521       nrow2    = bilen[row];
522       low2     = 0;
523       high2    = nrow2;
524 
525       for (j=0; j<n; j++) {
526         if (v) {
527           if (roworiented) value = v[i*n+j];
528           else             value = v[i+j*m];
529         } else value = 0.0;
530         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
531         if (in[j] >= cstart && in[j] < cend) {
532           col   = in[j] - cstart;
533           nonew = a->nonew;
534           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
535         } else if (in[j] < 0) continue;
536 #if defined(PETSC_USE_DEBUG)
537         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
538 #endif
539         else {
540           if (mat->was_assembled) {
541             if (!aij->colmap) {
542               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
543             }
544 #if defined(PETSC_USE_CTABLE)
545             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
546             col--;
547 #else
548             col = aij->colmap[in[j]] - 1;
549 #endif
550             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
551               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
552               col  =  in[j];
553               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
554               B     = aij->B;
555               b     = (Mat_SeqAIJ*)B->data;
556               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
557               rp2   = bj + bi[row];
558               ap2   = ba + bi[row];
559               rmax2 = bimax[row];
560               nrow2 = bilen[row];
561               low2  = 0;
562               high2 = nrow2;
563               bm    = aij->B->rmap->n;
564               ba    = b->a;
565             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
566           } else col = in[j];
567           nonew = b->nonew;
568           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
569         }
570       }
571     } else {
572       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
573       if (!aij->donotstash) {
574         mat->assembled = PETSC_FALSE;
575         if (roworiented) {
576           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
577         } else {
578           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         }
580       }
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatGetValues_MPIAIJ"
588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
589 {
590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
591   PetscErrorCode ierr;
592   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
594 
595   PetscFunctionBegin;
596   for (i=0; i<m; i++) {
597     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
598     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
599     if (idxm[i] >= rstart && idxm[i] < rend) {
600       row = idxm[i] - rstart;
601       for (j=0; j<n; j++) {
602         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
603         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
604         if (idxn[j] >= cstart && idxn[j] < cend) {
605           col  = idxn[j] - cstart;
606           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607         } else {
608           if (!aij->colmap) {
609             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
610           }
611 #if defined(PETSC_USE_CTABLE)
612           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
613           col--;
614 #else
615           col = aij->colmap[idxn[j]] - 1;
616 #endif
617           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
618           else {
619             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620           }
621         }
622       }
623     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       nstash,reallocs;
637   InsertMode     addv;
638 
639   PetscFunctionBegin;
640   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
668 
669   PetscFunctionBegin;
670   if (!aij->donotstash && !mat->nooffprocentries) {
671     while (1) {
672       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
673       if (!flg) break;
674 
675       for (i=0; i<n; ) {
676         /* Now identify the consecutive vals belonging to the same row */
677         for (j=i,rstart=row[j]; j<n; j++) {
678           if (row[j] != rstart) break;
679         }
680         if (j < n) ncols = j-i;
681         else       ncols = n-i;
682         /* Now assemble all these values with a single function call */
683         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
684 
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
710   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
711 
712   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
713 
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
743   PetscInt      *owners = A->rmap->range;
744   PetscInt       n      = A->rmap->n;
745   PetscMPIInt    size   = mat->size;
746   PetscSF        sf;
747   PetscInt      *lrows;
748   PetscSFNode   *rrows;
749   PetscInt       lastidx = -1, r, p = 0, len = 0;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   /* Create SF where leaves are input rows and roots are owned rows */
754   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
755   for (r = 0; r < n; ++r) lrows[r] = -1;
756   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
757   for (r = 0; r < N; ++r) {
758     const PetscInt idx   = rows[r];
759     PetscBool      found = PETSC_FALSE;
760     /* Trick for efficient searching for sorted rows */
761     if (lastidx > idx) p = 0;
762     lastidx = idx;
763     for (; p < size; ++p) {
764       if (idx >= owners[p] && idx < owners[p+1]) {
765         rrows[r].rank  = p;
766         rrows[r].index = rows[r] - owners[p];
767         found = PETSC_TRUE;
768         break;
769       }
770     }
771     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
772   }
773   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775   /* Collect flags for rows to be zeroed */
776   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
777   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
778   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779   /* Compress and put in row numbers */
780   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
794   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
795     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
796   } else if (diag != 0.0) {
797     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
798     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
802     }
803     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805   } else {
806     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
807   }
808   ierr = PetscFree(lrows);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815 {
816   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
817   PetscErrorCode    ierr;
818   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
819   PetscInt          i,j,r,m,p = 0,len = 0;
820   PetscInt          *lrows,*owners = A->rmap->range;
821   PetscSFNode       *rrows;
822   PetscSF           sf;
823   const PetscScalar *xx;
824   PetscScalar       *bb,*mask;
825   Vec               xmask,lmask;
826   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
827   const PetscInt    *aj, *ii,*ridx;
828   PetscScalar       *aa;
829 #if defined(PETSC_DEBUG)
830   PetscBool found = PETSC_FALSE;
831 #endif
832 
833   PetscFunctionBegin;
834   /* Create SF where leaves are input rows and roots are owned rows */
835   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
836   for (r = 0; r < n; ++r) lrows[r] = -1;
837   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
838   for (r = 0; r < N; ++r) {
839     const PetscInt idx   = rows[r];
840     PetscBool      found = PETSC_FALSE;
841     /* Trick for efficient searching for sorted rows */
842     if (lastidx > idx) p = 0;
843     lastidx = idx;
844     for (; p < size; ++p) {
845       if (idx >= owners[p] && idx < owners[p+1]) {
846         rrows[r].rank  = p;
847         rrows[r].index = rows[r] - owners[p];
848         found = PETSC_TRUE;
849         break;
850       }
851     }
852     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
853   }
854   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
855   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
856   /* Collect flags for rows to be zeroed */
857   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
858   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
860   /* Compress and put in row numbers */
861   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
862   /* zero diagonal part of matrix */
863   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
864   /* handle off diagonal part of matrix */
865   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
866   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
867   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
868   for (i=0; i<len; i++) bb[lrows[i]] = 1;
869   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
870   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
873   if (x) {
874     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
877     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
878   }
879   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
880   /* remove zeroed rows of off diagonal matrix */
881   ii = aij->i;
882   for (i=0; i<len; i++) {
883     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
884   }
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i=0; i<m; i++) {
891       n  = ii[i+1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij->a + ii[i];
894 
895       for (j=0; j<n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa*xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i=0; i<m; i++) {
908       n  = ii[i+1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij->a + ii[i];
911       for (j=0; j<n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa*xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x) {
922     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
923     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
924   }
925   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
926   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMult_MPIAIJ"
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 #undef __FUNCT__
962 #define __FUNCT__ "MatMultAdd_MPIAIJ"
963 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
964 {
965   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNCT__
977 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
978 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
979 {
980   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
981   PetscErrorCode ierr;
982   PetscBool      merged;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
986   /* do nondiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
988   if (!merged) {
989     /* send it on its way */
990     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* receive remote parts: note this assumes the values are not actually */
994     /* added in yy until the next line, */
995     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
996   } else {
997     /* do local part */
998     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
999     /* send it on its way */
1000     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001     /* values actually were received in the Begin() but we need to call this nop */
1002     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1009 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1010 {
1011   MPI_Comm       comm;
1012   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1013   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1014   IS             Me,Notme;
1015   PetscErrorCode ierr;
1016   PetscInt       M,N,first,last,*notme,i;
1017   PetscMPIInt    size;
1018 
1019   PetscFunctionBegin;
1020   /* Easy test: symmetric diagonal block */
1021   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1022   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1023   if (!*f) PetscFunctionReturn(0);
1024   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1026   if (size == 1) PetscFunctionReturn(0);
1027 
1028   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1029   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1030   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1031   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1032   for (i=0; i<first; i++) notme[i] = i;
1033   for (i=last; i<M; i++) notme[i-last+first] = i;
1034   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1035   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1036   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1037   Aoff = Aoffs[0];
1038   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1039   Boff = Boffs[0];
1040   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1041   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1043   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1045   ierr = PetscFree(notme);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1051 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1052 {
1053   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1054   PetscErrorCode ierr;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1059   /* send it on its way */
1060   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1061   /* do local part */
1062   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1063   /* receive remote parts */
1064   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 /*
1069   This only works correctly for square matrices where the subblock A->A is the
1070    diagonal block
1071 */
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1074 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1075 {
1076   PetscErrorCode ierr;
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078 
1079   PetscFunctionBegin;
1080   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1081   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1082   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatScale_MPIAIJ"
1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1095   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatDestroy_MPIAIJ"
1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1102 {
1103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107 #if defined(PETSC_USE_LOG)
1108   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1109 #endif
1110   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1111   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1112   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1113   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1114 #if defined(PETSC_USE_CTABLE)
1115   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1116 #else
1117   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1118 #endif
1119   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1120   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1124   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1140 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1141 {
1142   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1143   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1144   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1145   PetscErrorCode ierr;
1146   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1147   int            fd;
1148   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1149   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1150   PetscScalar    *column_values;
1151   PetscInt       message_count,flowcontrolcount;
1152   FILE           *file;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1156   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1157   nz   = A->nz + B->nz;
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1165     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     /* get largest number of rows any processor has */
1167     rlen  = mat->rmap->n;
1168     range = mat->rmap->range;
1169     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1170   } else {
1171     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     rlen = mat->rmap->n;
1173   }
1174 
1175   /* load up the local row counts */
1176   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1177   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1178 
1179   /* store the row lengths to the file */
1180   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1181   if (!rank) {
1182     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1185       rlen = range[i+1] - range[i];
1186       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1192     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1196 
1197   /* load up the local column indices */
1198   nzmax = nz; /* th processor needs space a largest processor needs */
1199   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1200   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1201   cnt   = 0;
1202   for (i=0; i<mat->rmap->n; i++) {
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ((col = garray[B->j[j]]) > cstart) break;
1205       column_indices[cnt++] = col;
1206     }
1207     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1208     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1209   }
1210   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1211 
1212   /* store the column indices to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1221       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     }
1224     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1225   } else {
1226     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1227     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1230   }
1231   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1232 
1233   /* load up the local column values */
1234   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1235   cnt  = 0;
1236   for (i=0; i<mat->rmap->n; i++) {
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       column_values[cnt++] = B->a[j];
1240     }
1241     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1242     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1243   }
1244   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1245 
1246   /* store the column values to the file */
1247   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1248   if (!rank) {
1249     MPI_Status status;
1250     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1251     for (i=1; i<size; i++) {
1252       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1253       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1254       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1255       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1256       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1257     }
1258     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1259   } else {
1260     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1261     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1264   }
1265   ierr = PetscFree(column_values);CHKERRQ(ierr);
1266 
1267   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1268   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #include <petscdraw.h>
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1276 {
1277   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1278   PetscErrorCode    ierr;
1279   PetscMPIInt       rank = aij->rank,size = aij->size;
1280   PetscBool         isdraw,iascii,isbinary;
1281   PetscViewer       sviewer;
1282   PetscViewerFormat format;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1288   if (iascii) {
1289     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1290     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291       MatInfo   info;
1292       PetscBool inodes;
1293 
1294       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1295       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1296       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1298       if (!inodes) {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       } else {
1302         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1303                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1304       }
1305       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1307       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1309       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1310       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1312       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1313       PetscFunctionReturn(0);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt inodecount,inodelimit,*inodes;
1316       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1317       if (inodes) {
1318         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1319       } else {
1320         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1321       }
1322       PetscFunctionReturn(0);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(0);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     } else {
1331       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1332     }
1333     PetscFunctionReturn(0);
1334   } else if (isdraw) {
1335     PetscDraw draw;
1336     PetscBool isnull;
1337     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1338     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1339   }
1340 
1341   if (size == 1) {
1342     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344   } else {
1345     /* assemble the entire matrix onto first processor. */
1346     Mat        A;
1347     Mat_SeqAIJ *Aloc;
1348     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1349     MatScalar  *a;
1350 
1351     if (mat->rmap->N > 1024) {
1352       PetscBool flg = PETSC_FALSE;
1353 
1354       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1355       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1356     }
1357 
1358     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1359     if (!rank) {
1360       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1361     } else {
1362       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1363     }
1364     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1365     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1366     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1367     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1368     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1369 
1370     /* copy over the A part */
1371     Aloc = (Mat_SeqAIJ*)aij->A->data;
1372     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1373     row  = mat->rmap->rstart;
1374     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1375     for (i=0; i<m; i++) {
1376       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1377       row++;
1378       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379     }
1380     aj = Aloc->j;
1381     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1382 
1383     /* copy over the B part */
1384     Aloc = (Mat_SeqAIJ*)aij->B->data;
1385     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1388     ct   = cols;
1389     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1392       row++;
1393       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1394     }
1395     ierr = PetscFree(ct);CHKERRQ(ierr);
1396     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398     /*
1399        Everyone has to call to draw the matrix since the graphics waits are
1400        synchronized across all processors that share the PetscDraw object
1401     */
1402     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1406       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1407       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1410     ierr = MatDestroy(&A);CHKERRQ(ierr);
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatView_MPIAIJ"
1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1418 {
1419   PetscErrorCode ierr;
1420   PetscBool      iascii,isdraw,issocket,isbinary;
1421 
1422   PetscFunctionBegin;
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1427   if (iascii || isdraw || isbinary || issocket) {
1428     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSOR_MPIAIJ"
1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1436 {
1437   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1438   PetscErrorCode ierr;
1439   Vec            bb1 = 0;
1440   PetscBool      hasop;
1441 
1442   PetscFunctionBegin;
1443   if (flag == SOR_APPLY_UPPER) {
1444     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1445     PetscFunctionReturn(0);
1446   }
1447 
1448   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1449     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1450   }
1451 
1452   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457 
1458     while (its--) {
1459       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461 
1462       /* update rhs: bb1 = bb - B*x */
1463       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1464       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1465 
1466       /* local sweep */
1467       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1468     }
1469   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1472       its--;
1473     }
1474     while (its--) {
1475       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1480       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1481 
1482       /* local sweep */
1483       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1484     }
1485   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1486     if (flag & SOR_ZERO_INITIAL_GUESS) {
1487       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488       its--;
1489     }
1490     while (its--) {
1491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493 
1494       /* update rhs: bb1 = bb - B*x */
1495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1497 
1498       /* local sweep */
1499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1500     }
1501   } else if (flag & SOR_EISENSTAT) {
1502     Vec xx1;
1503 
1504     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1505     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1506 
1507     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     if (!mat->diag) {
1510       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1511       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1512     }
1513     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1514     if (hasop) {
1515       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1516     } else {
1517       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1518     }
1519     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1520 
1521     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1522 
1523     /* local sweep */
1524     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1525     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1526     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1527   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1528 
1529   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatPermute_MPIAIJ"
1535 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1536 {
1537   Mat            aA,aB,Aperm;
1538   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1539   PetscScalar    *aa,*ba;
1540   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1541   PetscSF        rowsf,sf;
1542   IS             parcolp = NULL;
1543   PetscBool      done;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1548   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1549   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1550   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1551 
1552   /* Invert row permutation to find out where my rows should go */
1553   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1554   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1555   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1556   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1557   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1559 
1560   /* Invert column permutation to find out where my columns should go */
1561   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1562   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1563   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1564   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1565   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1568 
1569   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1570   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1571   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1572 
1573   /* Find out where my gcols should go */
1574   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1575   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1576   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1577   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1578   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1579   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1580   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1581   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1582 
1583   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1584   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1585   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1586   for (i=0; i<m; i++) {
1587     PetscInt row = rdest[i],rowner;
1588     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1589     for (j=ai[i]; j<ai[i+1]; j++) {
1590       PetscInt cowner,col = cdest[aj[j]];
1591       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1592       if (rowner == cowner) dnnz[i]++;
1593       else onnz[i]++;
1594     }
1595     for (j=bi[i]; j<bi[i+1]; j++) {
1596       PetscInt cowner,col = gcdest[bj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601   }
1602   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1603   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1607 
1608   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1609   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1611   for (i=0; i<m; i++) {
1612     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1613     PetscInt rowlen;
1614     rowlen = ai[i+1] - ai[i];
1615     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1616     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1617     rowlen = bi[i+1] - bi[i];
1618     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1619     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1620   }
1621   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1622   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1624   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1625   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1626   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1627   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1628   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1629   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1630   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1631   *B = Aperm;
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1638 {
1639   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1640   Mat            A    = mat->A,B = mat->B;
1641   PetscErrorCode ierr;
1642   PetscReal      isend[5],irecv[5];
1643 
1644   PetscFunctionBegin;
1645   info->block_size = 1.0;
1646   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   }
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatSetOption_MPIAIJ"
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700     MatCheckPreallocated(A,1);
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     a->roworiented = flg;
1706 
1707     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1708     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1709     break;
1710   case MAT_NEW_DIAGONALS:
1711     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1712     break;
1713   case MAT_IGNORE_OFF_PROC_ENTRIES:
1714     a->donotstash = flg;
1715     break;
1716   case MAT_SPD:
1717     A->spd_set = PETSC_TRUE;
1718     A->spd     = flg;
1719     if (flg) {
1720       A->symmetric                  = PETSC_TRUE;
1721       A->structurally_symmetric     = PETSC_TRUE;
1722       A->symmetric_set              = PETSC_TRUE;
1723       A->structurally_symmetric_set = PETSC_TRUE;
1724     }
1725     break;
1726   case MAT_SYMMETRIC:
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_STRUCTURALLY_SYMMETRIC:
1730     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1731     break;
1732   case MAT_HERMITIAN:
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_SYMMETRY_ETERNAL:
1736     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1737     break;
1738   default:
1739     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatGetRow_MPIAIJ"
1746 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1747 {
1748   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1749   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1750   PetscErrorCode ierr;
1751   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1752   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1753   PetscInt       *cmap,*idx_p;
1754 
1755   PetscFunctionBegin;
1756   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1757   mat->getrowactive = PETSC_TRUE;
1758 
1759   if (!mat->rowvalues && (idx || v)) {
1760     /*
1761         allocate enough space to hold information from the longest row.
1762     */
1763     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1764     PetscInt   max = 1,tmp;
1765     for (i=0; i<matin->rmap->n; i++) {
1766       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1767       if (max < tmp) max = tmp;
1768     }
1769     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1770   }
1771 
1772   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1773   lrow = row - rstart;
1774 
1775   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1776   if (!v)   {pvA = 0; pvB = 0;}
1777   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1778   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1779   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1780   nztot = nzA + nzB;
1781 
1782   cmap = mat->garray;
1783   if (v  || idx) {
1784     if (nztot) {
1785       /* Sort by increasing column numbers, assuming A and B already sorted */
1786       PetscInt imark = -1;
1787       if (v) {
1788         *v = v_p = mat->rowvalues;
1789         for (i=0; i<nzB; i++) {
1790           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1791           else break;
1792         }
1793         imark = i;
1794         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1795         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1796       }
1797       if (idx) {
1798         *idx = idx_p = mat->rowindices;
1799         if (imark > -1) {
1800           for (i=0; i<imark; i++) {
1801             idx_p[i] = cmap[cworkB[i]];
1802           }
1803         } else {
1804           for (i=0; i<nzB; i++) {
1805             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1806             else break;
1807           }
1808           imark = i;
1809         }
1810         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1811         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1812       }
1813     } else {
1814       if (idx) *idx = 0;
1815       if (v)   *v   = 0;
1816     }
1817   }
1818   *nz  = nztot;
1819   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1820   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1826 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1827 {
1828   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1829 
1830   PetscFunctionBegin;
1831   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1832   aij->getrowactive = PETSC_FALSE;
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatNorm_MPIAIJ"
1838 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1839 {
1840   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1841   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1842   PetscErrorCode ierr;
1843   PetscInt       i,j,cstart = mat->cmap->rstart;
1844   PetscReal      sum = 0.0;
1845   MatScalar      *v;
1846 
1847   PetscFunctionBegin;
1848   if (aij->size == 1) {
1849     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1850   } else {
1851     if (type == NORM_FROBENIUS) {
1852       v = amat->a;
1853       for (i=0; i<amat->nz; i++) {
1854         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1855       }
1856       v = bmat->a;
1857       for (i=0; i<bmat->nz; i++) {
1858         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1859       }
1860       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1861       *norm = PetscSqrtReal(*norm);
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp,*tmp2;
1864       PetscInt  *jj,*garray = aij->garray;
1865       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1866       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1867       *norm = 0.0;
1868       v     = amat->a; jj = amat->j;
1869       for (j=0; j<amat->nz; j++) {
1870         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1871       }
1872       v = bmat->a; jj = bmat->j;
1873       for (j=0; j<bmat->nz; j++) {
1874         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875       }
1876       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1877       for (j=0; j<mat->cmap->N; j++) {
1878         if (tmp2[j] > *norm) *norm = tmp2[j];
1879       }
1880       ierr = PetscFree(tmp);CHKERRQ(ierr);
1881       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1882     } else if (type == NORM_INFINITY) { /* max row norm */
1883       PetscReal ntemp = 0.0;
1884       for (j=0; j<aij->A->rmap->n; j++) {
1885         v   = amat->a + amat->i[j];
1886         sum = 0.0;
1887         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1888           sum += PetscAbsScalar(*v); v++;
1889         }
1890         v = bmat->a + bmat->i[j];
1891         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1892           sum += PetscAbsScalar(*v); v++;
1893         }
1894         if (sum > ntemp) ntemp = sum;
1895       }
1896       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1897     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatTranspose_MPIAIJ"
1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905 {
1906   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1907   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1908   PetscErrorCode ierr;
1909   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910   PetscInt       cstart = A->cmap->rstart,ncol;
1911   Mat            B;
1912   MatScalar      *array;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1916 
1917   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918   ai = Aloc->i; aj = Aloc->j;
1919   bi = Bloc->i; bj = Bloc->j;
1920   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1922     PetscSFNode          *oloc;
1923     PETSC_UNUSED PetscSF sf;
1924 
1925     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1926     /* compute d_nnz for preallocation */
1927     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (i=0; i<ai[ma]; i++) {
1929       d_nnz[aj[i]]++;
1930       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931     }
1932     /* compute local off-diagonal contributions */
1933     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935     /* map those to global */
1936     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1937     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1938     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1939     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1943 
1944     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1946     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
1947     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1949     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1950   } else {
1951     B    = *matout;
1952     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1953     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954   }
1955 
1956   /* copy over the A part */
1957   array = Aloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<ma; i++) {
1960     ncol = ai[i+1]-ai[i];
1961     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1962     row++;
1963     array += ncol; aj += ncol;
1964   }
1965   aj = Aloc->j;
1966   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1967 
1968   /* copy over the B part */
1969   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1970   array = Bloc->a;
1971   row   = A->rmap->rstart;
1972   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973   cols_tmp = cols;
1974   for (i=0; i<mb; i++) {
1975     ncol = bi[i+1]-bi[i];
1976     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; cols_tmp += ncol;
1979   }
1980   ierr = PetscFree(cols);CHKERRQ(ierr);
1981 
1982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1985     *matout = B;
1986   } else {
1987     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1994 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1995 {
1996   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1997   Mat            a    = aij->A,b = aij->B;
1998   PetscErrorCode ierr;
1999   PetscInt       s1,s2,s3;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2003   if (rr) {
2004     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2005     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2006     /* Overlap communication with computation. */
2007     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2008   }
2009   if (ll) {
2010     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2011     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2012     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2013   }
2014   /* scale  the diagonal block */
2015   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2016 
2017   if (rr) {
2018     /* Do a scatter end and then right scale the off-diagonal block */
2019     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2028 {
2029   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatEqual_MPIAIJ"
2039 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2040 {
2041   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2042   Mat            a,b,c,d;
2043   PetscBool      flg;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   a = matA->A; b = matA->B;
2048   c = matB->A; d = matB->B;
2049 
2050   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2051   if (flg) {
2052     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2053   }
2054   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatCopy_MPIAIJ"
2060 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2061 {
2062   PetscErrorCode ierr;
2063   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2064   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2065 
2066   PetscFunctionBegin;
2067   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2068   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2069     /* because of the column compression in the off-processor part of the matrix a->B,
2070        the number of columns in a->B and b->B may be different, hence we cannot call
2071        the MatCopy() directly on the two parts. If need be, we can provide a more
2072        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2073        then copying the submatrices */
2074     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2075   } else {
2076     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2077     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatSetUp_MPIAIJ"
2084 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2085 {
2086   PetscErrorCode ierr;
2087 
2088   PetscFunctionBegin;
2089   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2095 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2096 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2097 {
2098   PetscInt       i,m=Y->rmap->N;
2099   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2100   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2101   const PetscInt *xi = x->i,*yi = y->i;
2102 
2103   PetscFunctionBegin;
2104   /* Set the number of nonzeros in the new matrix */
2105   for (i=0; i<m; i++) {
2106     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2107     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2108     nnz[i] = 0;
2109     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2110       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2111       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2112       nnz[i]++;
2113     }
2114     for (; k<nzy; k++) nnz[i]++;
2115   }
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatAXPY_MPIAIJ"
2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   PetscInt       i;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x    = (Mat_SeqAIJ*)xx->A->data;
2133     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2134     y    = (Mat_SeqAIJ*)yy->A->data;
2135     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136     x    = (Mat_SeqAIJ*)xx->B->data;
2137     y    = (Mat_SeqAIJ*)yy->B->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140   } else if (str == SUBSET_NONZERO_PATTERN) {
2141     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2142 
2143     x = (Mat_SeqAIJ*)xx->B->data;
2144     y = (Mat_SeqAIJ*)yy->B->data;
2145     if (y->xtoy && y->XtoY != xx->B) {
2146       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2147       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2148     }
2149     if (!y->xtoy) { /* get xtoy */
2150       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2151       y->XtoY = xx->B;
2152       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2153     }
2154     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2155   } else {
2156     Mat      B;
2157     PetscInt *nnz_d,*nnz_o;
2158     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2159     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2160     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2161     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2162     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2163     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2164     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2166     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2167     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2168     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2169     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2170     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2171     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatConjugate_MPIAIJ"
2180 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2181 {
2182 #if defined(PETSC_USE_COMPLEX)
2183   PetscErrorCode ierr;
2184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2188   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2189 #else
2190   PetscFunctionBegin;
2191 #endif
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatRealPart_MPIAIJ"
2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2198 {
2199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2204   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2211 {
2212   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2217   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #if defined(PETSC_HAVE_PBGL)
2222 
2223 #include <boost/parallel/mpi/bsp_process_group.hpp>
2224 #include <boost/graph/distributed/ilu_default_graph.hpp>
2225 #include <boost/graph/distributed/ilu_0_block.hpp>
2226 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2227 #include <boost/graph/distributed/petsc/interface.hpp>
2228 #include <boost/multi_array.hpp>
2229 #include <boost/parallel/distributed_property_map->hpp>
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2233 /*
2234   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2235 */
2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2237 {
2238   namespace petsc = boost::distributed::petsc;
2239 
2240   namespace graph_dist = boost::graph::distributed;
2241   using boost::graph::distributed::ilu_default::process_group_type;
2242   using boost::graph::ilu_permuted;
2243 
2244   PetscBool      row_identity, col_identity;
2245   PetscContainer c;
2246   PetscInt       m, n, M, N;
2247   PetscErrorCode ierr;
2248 
2249   PetscFunctionBegin;
2250   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2251   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2252   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2253   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2254 
2255   process_group_type pg;
2256   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2257   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2258   lgraph_type& level_graph = *lgraph_p;
2259   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2260 
2261   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2262   ilu_permuted(level_graph);
2263 
2264   /* put together the new matrix */
2265   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2266   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2267   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2268   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2269   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2270   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2271   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273 
2274   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2275   ierr = PetscContainerSetPointer(c, lgraph_p);
2276   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2277   ierr = PetscContainerDestroy(&c);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2284 {
2285   PetscFunctionBegin;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatSolve_MPIAIJ"
2291 /*
2292   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293 */
2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2295 {
2296   namespace graph_dist = boost::graph::distributed;
2297 
2298   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2299   lgraph_type    *lgraph_p;
2300   PetscContainer c;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2305   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2306   ierr = VecCopy(b, x);CHKERRQ(ierr);
2307 
2308   PetscScalar *array_x;
2309   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2310   PetscInt sx;
2311   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2312 
2313   PetscScalar *array_b;
2314   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2315   PetscInt sb;
2316   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2317 
2318   lgraph_type& level_graph = *lgraph_p;
2319   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2320 
2321   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2322   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2323   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2324 
2325   typedef boost::iterator_property_map<array_ref_type::iterator,
2326                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2327   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2328   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2329 
2330   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2331   PetscFunctionReturn(0);
2332 }
2333 #endif
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatDestroy_MatRedundant"
2337 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2338 {
2339   PetscErrorCode ierr;
2340   Mat_Redundant  *redund;
2341   PetscInt       i;
2342   PetscMPIInt    size;
2343 
2344   PetscFunctionBegin;
2345   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2346   if (size == 1) {
2347     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2348     redund = a->redundant;
2349   } else {
2350     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2351     redund = a->redundant;
2352   }
2353   if (redund){
2354     if (redund->matseq) { /* via MatGetSubMatrices()  */
2355       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2356       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2357       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2358       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2359     } else {
2360       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2361       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2362       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2363       for (i=0; i<redund->nrecvs; i++) {
2364         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2365         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2366       }
2367       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2368     }
2369 
2370     if (redund->psubcomm) {
2371       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2372     }
2373     ierr = redund->Destroy(A);CHKERRQ(ierr);
2374     ierr = PetscFree(redund);CHKERRQ(ierr);
2375   }
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2381 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2382 {
2383   PetscMPIInt    rank,size;
2384   MPI_Comm       comm;
2385   PetscErrorCode ierr;
2386   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2387   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2388   PetscInt       *rowrange = mat->rmap->range;
2389   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2390   Mat            A = aij->A,B=aij->B,C=*matredundant;
2391   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2392   PetscScalar    *sbuf_a;
2393   PetscInt       nzlocal=a->nz+b->nz;
2394   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2395   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2396   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2397   MatScalar      *aworkA,*aworkB;
2398   PetscScalar    *vals;
2399   PetscMPIInt    tag1,tag2,tag3,imdex;
2400   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2401   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2402   MPI_Status     recv_status,*send_status;
2403   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2404   PetscInt       **rbuf_j=NULL;
2405   PetscScalar    **rbuf_a=NULL;
2406   Mat_Redundant  *redund =NULL;
2407 
2408   PetscFunctionBegin;
2409   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2410   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2412   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2413   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2414 
2415   if (reuse == MAT_REUSE_MATRIX) {
2416     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2417     if (subsize == 1) {
2418       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2419       redund = c->redundant;
2420     } else {
2421       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2422       redund = c->redundant;
2423     }
2424     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2425 
2426     nsends    = redund->nsends;
2427     nrecvs    = redund->nrecvs;
2428     send_rank = redund->send_rank;
2429     recv_rank = redund->recv_rank;
2430     sbuf_nz   = redund->sbuf_nz;
2431     rbuf_nz   = redund->rbuf_nz;
2432     sbuf_j    = redund->sbuf_j;
2433     sbuf_a    = redund->sbuf_a;
2434     rbuf_j    = redund->rbuf_j;
2435     rbuf_a    = redund->rbuf_a;
2436   }
2437 
2438   if (reuse == MAT_INITIAL_MATRIX) {
2439     PetscInt    nleftover,np_subcomm;
2440 
2441     /* get the destination processors' id send_rank, nsends and nrecvs */
2442     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2443 
2444     np_subcomm = size/nsubcomm;
2445     nleftover  = size - nsubcomm*np_subcomm;
2446 
2447     /* block of codes below is specific for INTERLACED */
2448     /* ------------------------------------------------*/
2449     nsends = 0; nrecvs = 0;
2450     for (i=0; i<size; i++) {
2451       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2452         send_rank[nsends++] = i;
2453         recv_rank[nrecvs++] = i;
2454       }
2455     }
2456     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2457       i = size-nleftover-1;
2458       j = 0;
2459       while (j < nsubcomm - nleftover) {
2460         send_rank[nsends++] = i;
2461         i--; j++;
2462       }
2463     }
2464 
2465     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2466       for (i=0; i<nleftover; i++) {
2467         recv_rank[nrecvs++] = size-nleftover+i;
2468       }
2469     }
2470     /*----------------------------------------------*/
2471 
2472     /* allocate sbuf_j, sbuf_a */
2473     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2474     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2475     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2476     /*
2477     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2478     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2479      */
2480   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2481 
2482   /* copy mat's local entries into the buffers */
2483   if (reuse == MAT_INITIAL_MATRIX) {
2484     rownz_max = 0;
2485     rptr      = sbuf_j;
2486     cols      = sbuf_j + rend-rstart + 1;
2487     vals      = sbuf_a;
2488     rptr[0]   = 0;
2489     for (i=0; i<rend-rstart; i++) {
2490       row    = i + rstart;
2491       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2492       ncols  = nzA + nzB;
2493       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2494       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2495       /* load the column indices for this row into cols */
2496       lwrite = 0;
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       for (l=0; l<nzA; l++) {
2504         vals[lwrite]   = aworkA[l];
2505         cols[lwrite++] = cstart + cworkA[l];
2506       }
2507       for (l=0; l<nzB; l++) {
2508         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2509           vals[lwrite]   = aworkB[l];
2510           cols[lwrite++] = ctmp;
2511         }
2512       }
2513       vals     += ncols;
2514       cols     += ncols;
2515       rptr[i+1] = rptr[i] + ncols;
2516       if (rownz_max < ncols) rownz_max = ncols;
2517     }
2518     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2519   } else { /* only copy matrix values into sbuf_a */
2520     rptr    = sbuf_j;
2521     vals    = sbuf_a;
2522     rptr[0] = 0;
2523     for (i=0; i<rend-rstart; i++) {
2524       row    = i + rstart;
2525       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2526       ncols  = nzA + nzB;
2527       cworkB = b->j + b->i[i];
2528       aworkA = a->a + a->i[i];
2529       aworkB = b->a + b->i[i];
2530       lwrite = 0;
2531       for (l=0; l<nzB; l++) {
2532         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2533       }
2534       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2537       }
2538       vals     += ncols;
2539       rptr[i+1] = rptr[i] + ncols;
2540     }
2541   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2542 
2543   /* send nzlocal to others, and recv other's nzlocal */
2544   /*--------------------------------------------------*/
2545   if (reuse == MAT_INITIAL_MATRIX) {
2546     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2547 
2548     s_waits2 = s_waits3 + nsends;
2549     s_waits1 = s_waits2 + nsends;
2550     r_waits1 = s_waits1 + nsends;
2551     r_waits2 = r_waits1 + nrecvs;
2552     r_waits3 = r_waits2 + nrecvs;
2553   } else {
2554     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2555 
2556     r_waits3 = s_waits3 + nsends;
2557   }
2558 
2559   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2560   if (reuse == MAT_INITIAL_MATRIX) {
2561     /* get new tags to keep the communication clean */
2562     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2563     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2564     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2565 
2566     /* post receives of other's nzlocal */
2567     for (i=0; i<nrecvs; i++) {
2568       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2569     }
2570     /* send nzlocal to others */
2571     for (i=0; i<nsends; i++) {
2572       sbuf_nz[i] = nzlocal;
2573       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2576     count = nrecvs;
2577     while (count) {
2578       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2579 
2580       recv_rank[imdex] = recv_status.MPI_SOURCE;
2581       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2582       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2583 
2584       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2585 
2586       rbuf_nz[imdex] += i + 2;
2587 
2588       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2589       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2590       count--;
2591     }
2592     /* wait on sends of nzlocal */
2593     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2594     /* send mat->i,j to others, and recv from other's */
2595     /*------------------------------------------------*/
2596     for (i=0; i<nsends; i++) {
2597       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2598       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2599     }
2600     /* wait on receives of mat->i,j */
2601     /*------------------------------*/
2602     count = nrecvs;
2603     while (count) {
2604       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2605       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2606       count--;
2607     }
2608     /* wait on sends of mat->i,j */
2609     /*---------------------------*/
2610     if (nsends) {
2611       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2612     }
2613   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2614 
2615   /* post receives, send and receive mat->a */
2616   /*----------------------------------------*/
2617   for (imdex=0; imdex<nrecvs; imdex++) {
2618     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2619   }
2620   for (i=0; i<nsends; i++) {
2621     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2622   }
2623   count = nrecvs;
2624   while (count) {
2625     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2626     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2627     count--;
2628   }
2629   if (nsends) {
2630     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2631   }
2632 
2633   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2634 
2635   /* create redundant matrix */
2636   /*-------------------------*/
2637   if (reuse == MAT_INITIAL_MATRIX) {
2638     const PetscInt *range;
2639     PetscInt       rstart_sub,rend_sub,mloc_sub;
2640 
2641     /* compute rownz_max for preallocation */
2642     for (imdex=0; imdex<nrecvs; imdex++) {
2643       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2644       rptr = rbuf_j[imdex];
2645       for (i=0; i<j; i++) {
2646         ncols = rptr[i+1] - rptr[i];
2647         if (rownz_max < ncols) rownz_max = ncols;
2648       }
2649     }
2650 
2651     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2652 
2653     /* get local size of redundant matrix
2654        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2655     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2656     rstart_sub = range[nsubcomm*subrank];
2657     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2658       rend_sub = range[nsubcomm*(subrank+1)];
2659     } else {
2660       rend_sub = mat->rmap->N;
2661     }
2662     mloc_sub = rend_sub - rstart_sub;
2663 
2664     if (M == N) {
2665       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2666     } else { /* non-square matrix */
2667       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2668     }
2669     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2670     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2671     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2672     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2673   } else {
2674     C = *matredundant;
2675   }
2676 
2677   /* insert local matrix entries */
2678   rptr = sbuf_j;
2679   cols = sbuf_j + rend-rstart + 1;
2680   vals = sbuf_a;
2681   for (i=0; i<rend-rstart; i++) {
2682     row   = i + rstart;
2683     ncols = rptr[i+1] - rptr[i];
2684     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2685     vals += ncols;
2686     cols += ncols;
2687   }
2688   /* insert received matrix entries */
2689   for (imdex=0; imdex<nrecvs; imdex++) {
2690     rstart = rowrange[recv_rank[imdex]];
2691     rend   = rowrange[recv_rank[imdex]+1];
2692     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2693     rptr   = rbuf_j[imdex];
2694     cols   = rbuf_j[imdex] + rend-rstart + 1;
2695     vals   = rbuf_a[imdex];
2696     for (i=0; i<rend-rstart; i++) {
2697       row   = i + rstart;
2698       ncols = rptr[i+1] - rptr[i];
2699       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700       vals += ncols;
2701       cols += ncols;
2702     }
2703   }
2704   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706 
2707   if (reuse == MAT_INITIAL_MATRIX) {
2708     *matredundant = C;
2709 
2710     /* create a supporting struct and attach it to C for reuse */
2711     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2712     if (subsize == 1) {
2713       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2714       c->redundant = redund;
2715     } else {
2716       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2717       c->redundant = redund;
2718     }
2719 
2720     redund->nzlocal   = nzlocal;
2721     redund->nsends    = nsends;
2722     redund->nrecvs    = nrecvs;
2723     redund->send_rank = send_rank;
2724     redund->recv_rank = recv_rank;
2725     redund->sbuf_nz   = sbuf_nz;
2726     redund->rbuf_nz   = rbuf_nz;
2727     redund->sbuf_j    = sbuf_j;
2728     redund->sbuf_a    = sbuf_a;
2729     redund->rbuf_j    = rbuf_j;
2730     redund->rbuf_a    = rbuf_a;
2731     redund->psubcomm  = NULL;
2732 
2733     redund->Destroy = C->ops->destroy;
2734     C->ops->destroy = MatDestroy_MatRedundant;
2735   }
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2741 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2742 {
2743   PetscErrorCode ierr;
2744   MPI_Comm       comm;
2745   PetscMPIInt    size,subsize;
2746   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2747   Mat_Redundant  *redund=NULL;
2748   PetscSubcomm   psubcomm=NULL;
2749   MPI_Comm       subcomm_in=subcomm;
2750   Mat            *matseq;
2751   IS             isrow,iscol;
2752 
2753   PetscFunctionBegin;
2754   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2755     if (reuse ==  MAT_INITIAL_MATRIX) {
2756       /* create psubcomm, then get subcomm */
2757       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2758       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2759       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2760 
2761       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2762       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2763       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2764       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2765       subcomm = psubcomm->comm;
2766     } else { /* retrieve psubcomm and subcomm */
2767       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2768       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2769       if (subsize == 1) {
2770         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2771         redund = c->redundant;
2772       } else {
2773         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2774         redund = c->redundant;
2775       }
2776       psubcomm = redund->psubcomm;
2777     }
2778     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2779       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2780       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2781         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2782         if (subsize == 1) {
2783           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2784           c->redundant->psubcomm = psubcomm;
2785         } else {
2786           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2787           c->redundant->psubcomm = psubcomm ;
2788         }
2789       }
2790       PetscFunctionReturn(0);
2791     }
2792   }
2793 
2794   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2795   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2796   if (reuse == MAT_INITIAL_MATRIX) {
2797     /* create a local sequential matrix matseq[0] */
2798     mloc_sub = PETSC_DECIDE;
2799     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2800     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2801     rstart = rend - mloc_sub;
2802     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2803     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2804   } else { /* reuse == MAT_REUSE_MATRIX */
2805     if (subsize == 1) {
2806       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2807       redund = c->redundant;
2808     } else {
2809       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2810       redund = c->redundant;
2811     }
2812 
2813     isrow  = redund->isrow;
2814     iscol  = redund->iscol;
2815     matseq = redund->matseq;
2816   }
2817   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2818   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2819 
2820   if (reuse == MAT_INITIAL_MATRIX) {
2821     /* create a supporting struct and attach it to C for reuse */
2822     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2823     if (subsize == 1) {
2824       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2825       c->redundant = redund;
2826     } else {
2827       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2828       c->redundant = redund;
2829     }
2830     redund->isrow    = isrow;
2831     redund->iscol    = iscol;
2832     redund->matseq   = matseq;
2833     redund->psubcomm = psubcomm;
2834     redund->Destroy               = (*matredundant)->ops->destroy;
2835     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2836   }
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843 {
2844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2845   PetscErrorCode ierr;
2846   PetscInt       i,*idxb = 0;
2847   PetscScalar    *va,*vb;
2848   Vec            vtmp;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2852   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2853   if (idx) {
2854     for (i=0; i<A->rmap->n; i++) {
2855       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856     }
2857   }
2858 
2859   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2860   if (idx) {
2861     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2862   }
2863   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2864   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2865 
2866   for (i=0; i<A->rmap->n; i++) {
2867     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2868       va[i] = vb[i];
2869       if (idx) idx[i] = a->garray[idxb[i]];
2870     }
2871   }
2872 
2873   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2874   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2875   ierr = PetscFree(idxb);CHKERRQ(ierr);
2876   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883 {
2884   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2885   PetscErrorCode ierr;
2886   PetscInt       i,*idxb = 0;
2887   PetscScalar    *va,*vb;
2888   Vec            vtmp;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2892   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2893   if (idx) {
2894     for (i=0; i<A->cmap->n; i++) {
2895       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2896     }
2897   }
2898 
2899   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2900   if (idx) {
2901     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2902   }
2903   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2904   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2905 
2906   for (i=0; i<A->rmap->n; i++) {
2907     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2908       va[i] = vb[i];
2909       if (idx) idx[i] = a->garray[idxb[i]];
2910     }
2911   }
2912 
2913   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2914   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2915   ierr = PetscFree(idxb);CHKERRQ(ierr);
2916   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2923 {
2924   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2925   PetscInt       n      = A->rmap->n;
2926   PetscInt       cstart = A->cmap->rstart;
2927   PetscInt       *cmap  = mat->garray;
2928   PetscInt       *diagIdx, *offdiagIdx;
2929   Vec            diagV, offdiagV;
2930   PetscScalar    *a, *diagA, *offdiagA;
2931   PetscInt       r;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2936   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2938   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2939   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2940   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   for (r = 0; r < n; ++r) {
2944     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2945       a[r]   = diagA[r];
2946       idx[r] = cstart + diagIdx[r];
2947     } else {
2948       a[r]   = offdiagA[r];
2949       idx[r] = cmap[offdiagIdx[r]];
2950     }
2951   }
2952   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2953   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2955   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2956   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2957   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964 {
2965   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2966   PetscInt       n      = A->rmap->n;
2967   PetscInt       cstart = A->cmap->rstart;
2968   PetscInt       *cmap  = mat->garray;
2969   PetscInt       *diagIdx, *offdiagIdx;
2970   Vec            diagV, offdiagV;
2971   PetscScalar    *a, *diagA, *offdiagA;
2972   PetscInt       r;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2977   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2979   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2980   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2981   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   for (r = 0; r < n; ++r) {
2985     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2986       a[r]   = diagA[r];
2987       idx[r] = cstart + diagIdx[r];
2988     } else {
2989       a[r]   = offdiagA[r];
2990       idx[r] = cmap[offdiagIdx[r]];
2991     }
2992   }
2993   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2997   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2998   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3005 {
3006   PetscErrorCode ierr;
3007   Mat            *dummy;
3008 
3009   PetscFunctionBegin;
3010   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3011   *newmat = *dummy;
3012   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3018 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3019 {
3020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3030 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3031 {
3032   PetscErrorCode ierr;
3033   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3034 
3035   PetscFunctionBegin;
3036   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3037   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3038   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 /* -------------------------------------------------------------------*/
3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3045                                        MatGetRow_MPIAIJ,
3046                                        MatRestoreRow_MPIAIJ,
3047                                        MatMult_MPIAIJ,
3048                                 /* 4*/ MatMultAdd_MPIAIJ,
3049                                        MatMultTranspose_MPIAIJ,
3050                                        MatMultTransposeAdd_MPIAIJ,
3051 #if defined(PETSC_HAVE_PBGL)
3052                                        MatSolve_MPIAIJ,
3053 #else
3054                                        0,
3055 #endif
3056                                        0,
3057                                        0,
3058                                 /*10*/ 0,
3059                                        0,
3060                                        0,
3061                                        MatSOR_MPIAIJ,
3062                                        MatTranspose_MPIAIJ,
3063                                 /*15*/ MatGetInfo_MPIAIJ,
3064                                        MatEqual_MPIAIJ,
3065                                        MatGetDiagonal_MPIAIJ,
3066                                        MatDiagonalScale_MPIAIJ,
3067                                        MatNorm_MPIAIJ,
3068                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3069                                        MatAssemblyEnd_MPIAIJ,
3070                                        MatSetOption_MPIAIJ,
3071                                        MatZeroEntries_MPIAIJ,
3072                                 /*24*/ MatZeroRows_MPIAIJ,
3073                                        0,
3074 #if defined(PETSC_HAVE_PBGL)
3075                                        0,
3076 #else
3077                                        0,
3078 #endif
3079                                        0,
3080                                        0,
3081                                 /*29*/ MatSetUp_MPIAIJ,
3082 #if defined(PETSC_HAVE_PBGL)
3083                                        0,
3084 #else
3085                                        0,
3086 #endif
3087                                        0,
3088                                        0,
3089                                        0,
3090                                 /*34*/ MatDuplicate_MPIAIJ,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*39*/ MatAXPY_MPIAIJ,
3096                                        MatGetSubMatrices_MPIAIJ,
3097                                        MatIncreaseOverlap_MPIAIJ,
3098                                        MatGetValues_MPIAIJ,
3099                                        MatCopy_MPIAIJ,
3100                                 /*44*/ MatGetRowMax_MPIAIJ,
3101                                        MatScale_MPIAIJ,
3102                                        0,
3103                                        0,
3104                                        MatZeroRowsColumns_MPIAIJ,
3105                                 /*49*/ MatSetRandom_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3111                                        0,
3112                                        MatSetUnfactored_MPIAIJ,
3113                                        MatPermute_MPIAIJ,
3114                                        0,
3115                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3116                                        MatDestroy_MPIAIJ,
3117                                        MatView_MPIAIJ,
3118                                        0,
3119                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3120                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3121                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3126                                        MatGetRowMinAbs_MPIAIJ,
3127                                        0,
3128                                        MatSetColoring_MPIAIJ,
3129                                        0,
3130                                        MatSetValuesAdifor_MPIAIJ,
3131                                 /*75*/ MatFDColoringApply_AIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                        MatFindZeroDiagonals_MPIAIJ,
3136                                 /*80*/ 0,
3137                                        0,
3138                                        0,
3139                                 /*83*/ MatLoad_MPIAIJ,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3146                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3147                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3148                                        MatPtAP_MPIAIJ_MPIAIJ,
3149                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3150                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                 /*99*/ 0,
3156                                        0,
3157                                        0,
3158                                        MatConjugate_MPIAIJ,
3159                                        0,
3160                                 /*104*/MatSetValuesRow_MPIAIJ,
3161                                        MatRealPart_MPIAIJ,
3162                                        MatImaginaryPart_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                 /*109*/0,
3166                                        MatGetRedundantMatrix_MPIAIJ,
3167                                        MatGetRowMin_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                 /*119*/0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        MatGetMultiProcBlock_MPIAIJ,
3180                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3181                                        MatGetColumnNorms_MPIAIJ,
3182                                        MatInvertBlockDiagonal_MPIAIJ,
3183                                        0,
3184                                        MatGetSubMatricesParallel_MPIAIJ,
3185                                 /*129*/0,
3186                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3187                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3188                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3189                                        0,
3190                                 /*134*/0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                 /*139*/0,
3196                                        0,
3197                                        0,
3198                                        MatFDColoringSetUp_MPIXAIJ
3199 };
3200 
3201 /* ----------------------------------------------------------------------------------------*/
3202 
3203 #undef __FUNCT__
3204 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3205 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3206 {
3207   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3208   PetscErrorCode ierr;
3209 
3210   PetscFunctionBegin;
3211   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3212   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3218 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3219 {
3220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3225   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3231 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3232 {
3233   Mat_MPIAIJ     *b;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3238   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3239   b = (Mat_MPIAIJ*)B->data;
3240 
3241   if (!B->preallocated) {
3242     /* Explicitly create 2 MATSEQAIJ matrices. */
3243     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3244     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3245     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3246     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3247     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3248     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3249     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3250     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3251     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3252     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3253   }
3254 
3255   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3256   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3257   B->preallocated = PETSC_TRUE;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3264 {
3265   Mat            mat;
3266   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   *newmat = 0;
3271   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3272   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3273   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3274   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3275   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3276   a       = (Mat_MPIAIJ*)mat->data;
3277 
3278   mat->factortype   = matin->factortype;
3279   mat->rmap->bs     = matin->rmap->bs;
3280   mat->cmap->bs     = matin->cmap->bs;
3281   mat->assembled    = PETSC_TRUE;
3282   mat->insertmode   = NOT_SET_VALUES;
3283   mat->preallocated = PETSC_TRUE;
3284 
3285   a->size         = oldmat->size;
3286   a->rank         = oldmat->rank;
3287   a->donotstash   = oldmat->donotstash;
3288   a->roworiented  = oldmat->roworiented;
3289   a->rowindices   = 0;
3290   a->rowvalues    = 0;
3291   a->getrowactive = PETSC_FALSE;
3292 
3293   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3294   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3295 
3296   if (oldmat->colmap) {
3297 #if defined(PETSC_USE_CTABLE)
3298     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3299 #else
3300     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3301     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3302     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3303 #endif
3304   } else a->colmap = 0;
3305   if (oldmat->garray) {
3306     PetscInt len;
3307     len  = oldmat->B->cmap->n;
3308     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3309     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3310     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3311   } else a->garray = 0;
3312 
3313   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3314   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3315   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3316   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3317   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3318   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3319   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3320   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3321   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3322   *newmat = mat;
3323   PetscFunctionReturn(0);
3324 }
3325 
3326 
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatLoad_MPIAIJ"
3330 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3331 {
3332   PetscScalar    *vals,*svals;
3333   MPI_Comm       comm;
3334   PetscErrorCode ierr;
3335   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3336   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3337   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3338   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3339   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3340   int            fd;
3341   PetscInt       bs = 1;
3342 
3343   PetscFunctionBegin;
3344   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3345   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3346   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3347   if (!rank) {
3348     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3349     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3350     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3351   }
3352 
3353   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3354   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3355   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3356 
3357   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3358 
3359   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3360   M    = header[1]; N = header[2];
3361   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3362   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3363   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3364 
3365   /* If global sizes are set, check if they are consistent with that given in the file */
3366   if (sizesset) {
3367     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3368   }
3369   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3370   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3371 
3372   /* determine ownership of all (block) rows */
3373   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3374   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3375   else m = newMat->rmap->n; /* Set by user */
3376 
3377   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3378   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3379 
3380   /* First process needs enough room for process with most rows */
3381   if (!rank) {
3382     mmax = rowners[1];
3383     for (i=2; i<=size; i++) {
3384       mmax = PetscMax(mmax, rowners[i]);
3385     }
3386   } else mmax = -1;             /* unused, but compilers complain */
3387 
3388   rowners[0] = 0;
3389   for (i=2; i<=size; i++) {
3390     rowners[i] += rowners[i-1];
3391   }
3392   rstart = rowners[rank];
3393   rend   = rowners[rank+1];
3394 
3395   /* distribute row lengths to all processors */
3396   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3397   if (!rank) {
3398     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3399     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3400     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3401     for (j=0; j<m; j++) {
3402       procsnz[0] += ourlens[j];
3403     }
3404     for (i=1; i<size; i++) {
3405       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3406       /* calculate the number of nonzeros on each processor */
3407       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3408         procsnz[i] += rowlengths[j];
3409       }
3410       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3411     }
3412     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3413   } else {
3414     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3415   }
3416 
3417   if (!rank) {
3418     /* determine max buffer needed and allocate it */
3419     maxnz = 0;
3420     for (i=0; i<size; i++) {
3421       maxnz = PetscMax(maxnz,procsnz[i]);
3422     }
3423     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3424 
3425     /* read in my part of the matrix column indices  */
3426     nz   = procsnz[0];
3427     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3428     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3429 
3430     /* read in every one elses and ship off */
3431     for (i=1; i<size; i++) {
3432       nz   = procsnz[i];
3433       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3434       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3435     }
3436     ierr = PetscFree(cols);CHKERRQ(ierr);
3437   } else {
3438     /* determine buffer space needed for message */
3439     nz = 0;
3440     for (i=0; i<m; i++) {
3441       nz += ourlens[i];
3442     }
3443     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3444 
3445     /* receive message of column indices*/
3446     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3447   }
3448 
3449   /* determine column ownership if matrix is not square */
3450   if (N != M) {
3451     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3452     else n = newMat->cmap->n;
3453     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3454     cstart = cend - n;
3455   } else {
3456     cstart = rstart;
3457     cend   = rend;
3458     n      = cend - cstart;
3459   }
3460 
3461   /* loop over local rows, determining number of off diagonal entries */
3462   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3463   jj   = 0;
3464   for (i=0; i<m; i++) {
3465     for (j=0; j<ourlens[i]; j++) {
3466       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3467       jj++;
3468     }
3469   }
3470 
3471   for (i=0; i<m; i++) {
3472     ourlens[i] -= offlens[i];
3473   }
3474   if (!sizesset) {
3475     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3476   }
3477 
3478   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3479 
3480   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3481 
3482   for (i=0; i<m; i++) {
3483     ourlens[i] += offlens[i];
3484   }
3485 
3486   if (!rank) {
3487     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3488 
3489     /* read in my part of the matrix numerical values  */
3490     nz   = procsnz[0];
3491     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3492 
3493     /* insert into matrix */
3494     jj      = rstart;
3495     smycols = mycols;
3496     svals   = vals;
3497     for (i=0; i<m; i++) {
3498       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3499       smycols += ourlens[i];
3500       svals   += ourlens[i];
3501       jj++;
3502     }
3503 
3504     /* read in other processors and ship out */
3505     for (i=1; i<size; i++) {
3506       nz   = procsnz[i];
3507       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3508       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3509     }
3510     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3511   } else {
3512     /* receive numeric values */
3513     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3514 
3515     /* receive message of values*/
3516     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3517 
3518     /* insert into matrix */
3519     jj      = rstart;
3520     smycols = mycols;
3521     svals   = vals;
3522     for (i=0; i<m; i++) {
3523       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3524       smycols += ourlens[i];
3525       svals   += ourlens[i];
3526       jj++;
3527     }
3528   }
3529   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3530   ierr = PetscFree(vals);CHKERRQ(ierr);
3531   ierr = PetscFree(mycols);CHKERRQ(ierr);
3532   ierr = PetscFree(rowners);CHKERRQ(ierr);
3533   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3540 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3541 {
3542   PetscErrorCode ierr;
3543   IS             iscol_local;
3544   PetscInt       csize;
3545 
3546   PetscFunctionBegin;
3547   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3548   if (call == MAT_REUSE_MATRIX) {
3549     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3550     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3551   } else {
3552     PetscInt cbs;
3553     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3554     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3555     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3556   }
3557   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3558   if (call == MAT_INITIAL_MATRIX) {
3559     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3560     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3561   }
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3568 /*
3569     Not great since it makes two copies of the submatrix, first an SeqAIJ
3570   in local and then by concatenating the local matrices the end result.
3571   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3572 
3573   Note: This requires a sequential iscol with all indices.
3574 */
3575 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3576 {
3577   PetscErrorCode ierr;
3578   PetscMPIInt    rank,size;
3579   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3580   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3581   PetscBool      allcolumns, colflag;
3582   Mat            M,Mreuse;
3583   MatScalar      *vwork,*aa;
3584   MPI_Comm       comm;
3585   Mat_SeqAIJ     *aij;
3586 
3587   PetscFunctionBegin;
3588   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3591 
3592   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3593   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3594   if (colflag && ncol == mat->cmap->N) {
3595     allcolumns = PETSC_TRUE;
3596   } else {
3597     allcolumns = PETSC_FALSE;
3598   }
3599   if (call ==  MAT_REUSE_MATRIX) {
3600     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3601     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3602     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3603   } else {
3604     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3605   }
3606 
3607   /*
3608       m - number of local rows
3609       n - number of columns (same on all processors)
3610       rstart - first row in new global matrix generated
3611   */
3612   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3613   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3614   if (call == MAT_INITIAL_MATRIX) {
3615     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3616     ii  = aij->i;
3617     jj  = aij->j;
3618 
3619     /*
3620         Determine the number of non-zeros in the diagonal and off-diagonal
3621         portions of the matrix in order to do correct preallocation
3622     */
3623 
3624     /* first get start and end of "diagonal" columns */
3625     if (csize == PETSC_DECIDE) {
3626       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3627       if (mglobal == n) { /* square matrix */
3628         nlocal = m;
3629       } else {
3630         nlocal = n/size + ((n % size) > rank);
3631       }
3632     } else {
3633       nlocal = csize;
3634     }
3635     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3636     rstart = rend - nlocal;
3637     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3638 
3639     /* next, compute all the lengths */
3640     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3641     olens = dlens + m;
3642     for (i=0; i<m; i++) {
3643       jend = ii[i+1] - ii[i];
3644       olen = 0;
3645       dlen = 0;
3646       for (j=0; j<jend; j++) {
3647         if (*jj < rstart || *jj >= rend) olen++;
3648         else dlen++;
3649         jj++;
3650       }
3651       olens[i] = olen;
3652       dlens[i] = dlen;
3653     }
3654     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3655     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3656     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3657     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3658     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3659     ierr = PetscFree(dlens);CHKERRQ(ierr);
3660   } else {
3661     PetscInt ml,nl;
3662 
3663     M    = *newmat;
3664     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3665     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3666     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3667     /*
3668          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3669        rather than the slower MatSetValues().
3670     */
3671     M->was_assembled = PETSC_TRUE;
3672     M->assembled     = PETSC_FALSE;
3673   }
3674   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3675   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3676   ii   = aij->i;
3677   jj   = aij->j;
3678   aa   = aij->a;
3679   for (i=0; i<m; i++) {
3680     row   = rstart + i;
3681     nz    = ii[i+1] - ii[i];
3682     cwork = jj;     jj += nz;
3683     vwork = aa;     aa += nz;
3684     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3685   }
3686 
3687   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   *newmat = M;
3690 
3691   /* save submatrix used in processor for next request */
3692   if (call ==  MAT_INITIAL_MATRIX) {
3693     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3694     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3695   }
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 #undef __FUNCT__
3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3701 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3702 {
3703   PetscInt       m,cstart, cend,j,nnz,i,d;
3704   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3705   const PetscInt *JJ;
3706   PetscScalar    *values;
3707   PetscErrorCode ierr;
3708 
3709   PetscFunctionBegin;
3710   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3711 
3712   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3713   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3714   m      = B->rmap->n;
3715   cstart = B->cmap->rstart;
3716   cend   = B->cmap->rend;
3717   rstart = B->rmap->rstart;
3718 
3719   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3720 
3721 #if defined(PETSC_USE_DEBUGGING)
3722   for (i=0; i<m; i++) {
3723     nnz = Ii[i+1]- Ii[i];
3724     JJ  = J + Ii[i];
3725     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3726     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3727     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3728   }
3729 #endif
3730 
3731   for (i=0; i<m; i++) {
3732     nnz     = Ii[i+1]- Ii[i];
3733     JJ      = J + Ii[i];
3734     nnz_max = PetscMax(nnz_max,nnz);
3735     d       = 0;
3736     for (j=0; j<nnz; j++) {
3737       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3738     }
3739     d_nnz[i] = d;
3740     o_nnz[i] = nnz - d;
3741   }
3742   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3743   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3744 
3745   if (v) values = (PetscScalar*)v;
3746   else {
3747     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3748   }
3749 
3750   for (i=0; i<m; i++) {
3751     ii   = i + rstart;
3752     nnz  = Ii[i+1]- Ii[i];
3753     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3754   }
3755   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757 
3758   if (!v) {
3759     ierr = PetscFree(values);CHKERRQ(ierr);
3760   }
3761   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3767 /*@
3768    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3769    (the default parallel PETSc format).
3770 
3771    Collective on MPI_Comm
3772 
3773    Input Parameters:
3774 +  B - the matrix
3775 .  i - the indices into j for the start of each local row (starts with zero)
3776 .  j - the column indices for each local row (starts with zero)
3777 -  v - optional values in the matrix
3778 
3779    Level: developer
3780 
3781    Notes:
3782        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3783      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3784      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3785 
3786        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3787 
3788        The format which is used for the sparse matrix input, is equivalent to a
3789     row-major ordering.. i.e for the following matrix, the input data expected is
3790     as shown:
3791 
3792         1 0 0
3793         2 0 3     P0
3794        -------
3795         4 5 6     P1
3796 
3797      Process0 [P0]: rows_owned=[0,1]
3798         i =  {0,1,3}  [size = nrow+1  = 2+1]
3799         j =  {0,0,2}  [size = nz = 6]
3800         v =  {1,2,3}  [size = nz = 6]
3801 
3802      Process1 [P1]: rows_owned=[2]
3803         i =  {0,3}    [size = nrow+1  = 1+1]
3804         j =  {0,1,2}  [size = nz = 6]
3805         v =  {4,5,6}  [size = nz = 6]
3806 
3807 .keywords: matrix, aij, compressed row, sparse, parallel
3808 
3809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3810           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3811 @*/
3812 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3813 {
3814   PetscErrorCode ierr;
3815 
3816   PetscFunctionBegin;
3817   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 #undef __FUNCT__
3822 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3823 /*@C
3824    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3825    (the default parallel PETSc format).  For good matrix assembly performance
3826    the user should preallocate the matrix storage by setting the parameters
3827    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3828    performance can be increased by more than a factor of 50.
3829 
3830    Collective on MPI_Comm
3831 
3832    Input Parameters:
3833 +  A - the matrix
3834 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3835            (same value is used for all local rows)
3836 .  d_nnz - array containing the number of nonzeros in the various rows of the
3837            DIAGONAL portion of the local submatrix (possibly different for each row)
3838            or NULL, if d_nz is used to specify the nonzero structure.
3839            The size of this array is equal to the number of local rows, i.e 'm'.
3840            For matrices that will be factored, you must leave room for (and set)
3841            the diagonal entry even if it is zero.
3842 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3843            submatrix (same value is used for all local rows).
3844 -  o_nnz - array containing the number of nonzeros in the various rows of the
3845            OFF-DIAGONAL portion of the local submatrix (possibly different for
3846            each row) or NULL, if o_nz is used to specify the nonzero
3847            structure. The size of this array is equal to the number
3848            of local rows, i.e 'm'.
3849 
3850    If the *_nnz parameter is given then the *_nz parameter is ignored
3851 
3852    The AIJ format (also called the Yale sparse matrix format or
3853    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3854    storage.  The stored row and column indices begin with zero.
3855    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3856 
3857    The parallel matrix is partitioned such that the first m0 rows belong to
3858    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3859    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3860 
3861    The DIAGONAL portion of the local submatrix of a processor can be defined
3862    as the submatrix which is obtained by extraction the part corresponding to
3863    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3864    first row that belongs to the processor, r2 is the last row belonging to
3865    the this processor, and c1-c2 is range of indices of the local part of a
3866    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3867    common case of a square matrix, the row and column ranges are the same and
3868    the DIAGONAL part is also square. The remaining portion of the local
3869    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3870 
3871    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3872 
3873    You can call MatGetInfo() to get information on how effective the preallocation was;
3874    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3875    You can also run with the option -info and look for messages with the string
3876    malloc in them to see if additional memory allocation was needed.
3877 
3878    Example usage:
3879 
3880    Consider the following 8x8 matrix with 34 non-zero values, that is
3881    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3882    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3883    as follows:
3884 
3885 .vb
3886             1  2  0  |  0  3  0  |  0  4
3887     Proc0   0  5  6  |  7  0  0  |  8  0
3888             9  0 10  | 11  0  0  | 12  0
3889     -------------------------------------
3890            13  0 14  | 15 16 17  |  0  0
3891     Proc1   0 18  0  | 19 20 21  |  0  0
3892             0  0  0  | 22 23  0  | 24  0
3893     -------------------------------------
3894     Proc2  25 26 27  |  0  0 28  | 29  0
3895            30  0  0  | 31 32 33  |  0 34
3896 .ve
3897 
3898    This can be represented as a collection of submatrices as:
3899 
3900 .vb
3901       A B C
3902       D E F
3903       G H I
3904 .ve
3905 
3906    Where the submatrices A,B,C are owned by proc0, D,E,F are
3907    owned by proc1, G,H,I are owned by proc2.
3908 
3909    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3910    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3911    The 'M','N' parameters are 8,8, and have the same values on all procs.
3912 
3913    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3914    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3915    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3916    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3917    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3918    matrix, ans [DF] as another SeqAIJ matrix.
3919 
3920    When d_nz, o_nz parameters are specified, d_nz storage elements are
3921    allocated for every row of the local diagonal submatrix, and o_nz
3922    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3923    One way to choose d_nz and o_nz is to use the max nonzerors per local
3924    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3925    In this case, the values of d_nz,o_nz are:
3926 .vb
3927      proc0 : dnz = 2, o_nz = 2
3928      proc1 : dnz = 3, o_nz = 2
3929      proc2 : dnz = 1, o_nz = 4
3930 .ve
3931    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3932    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3933    for proc3. i.e we are using 12+15+10=37 storage locations to store
3934    34 values.
3935 
3936    When d_nnz, o_nnz parameters are specified, the storage is specified
3937    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3938    In the above case the values for d_nnz,o_nnz are:
3939 .vb
3940      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3941      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3942      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3943 .ve
3944    Here the space allocated is sum of all the above values i.e 34, and
3945    hence pre-allocation is perfect.
3946 
3947    Level: intermediate
3948 
3949 .keywords: matrix, aij, compressed row, sparse, parallel
3950 
3951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3952           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3953 @*/
3954 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3955 {
3956   PetscErrorCode ierr;
3957 
3958   PetscFunctionBegin;
3959   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3960   PetscValidType(B,1);
3961   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3962   PetscFunctionReturn(0);
3963 }
3964 
3965 #undef __FUNCT__
3966 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3967 /*@
3968      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3969          CSR format the local rows.
3970 
3971    Collective on MPI_Comm
3972 
3973    Input Parameters:
3974 +  comm - MPI communicator
3975 .  m - number of local rows (Cannot be PETSC_DECIDE)
3976 .  n - This value should be the same as the local size used in creating the
3977        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3978        calculated if N is given) For square matrices n is almost always m.
3979 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3980 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3981 .   i - row indices
3982 .   j - column indices
3983 -   a - matrix values
3984 
3985    Output Parameter:
3986 .   mat - the matrix
3987 
3988    Level: intermediate
3989 
3990    Notes:
3991        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3992      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3993      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3994 
3995        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3996 
3997        The format which is used for the sparse matrix input, is equivalent to a
3998     row-major ordering.. i.e for the following matrix, the input data expected is
3999     as shown:
4000 
4001         1 0 0
4002         2 0 3     P0
4003        -------
4004         4 5 6     P1
4005 
4006      Process0 [P0]: rows_owned=[0,1]
4007         i =  {0,1,3}  [size = nrow+1  = 2+1]
4008         j =  {0,0,2}  [size = nz = 6]
4009         v =  {1,2,3}  [size = nz = 6]
4010 
4011      Process1 [P1]: rows_owned=[2]
4012         i =  {0,3}    [size = nrow+1  = 1+1]
4013         j =  {0,1,2}  [size = nz = 6]
4014         v =  {4,5,6}  [size = nz = 6]
4015 
4016 .keywords: matrix, aij, compressed row, sparse, parallel
4017 
4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4019           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4020 @*/
4021 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4022 {
4023   PetscErrorCode ierr;
4024 
4025   PetscFunctionBegin;
4026   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4027   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4028   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4029   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4030   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4031   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4032   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 #undef __FUNCT__
4037 #define __FUNCT__ "MatCreateAIJ"
4038 /*@C
4039    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4040    (the default parallel PETSc format).  For good matrix assembly performance
4041    the user should preallocate the matrix storage by setting the parameters
4042    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4043    performance can be increased by more than a factor of 50.
4044 
4045    Collective on MPI_Comm
4046 
4047    Input Parameters:
4048 +  comm - MPI communicator
4049 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4050            This value should be the same as the local size used in creating the
4051            y vector for the matrix-vector product y = Ax.
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4058            (same value is used for all local rows)
4059 .  d_nnz - array containing the number of nonzeros in the various rows of the
4060            DIAGONAL portion of the local submatrix (possibly different for each row)
4061            or NULL, if d_nz is used to specify the nonzero structure.
4062            The size of this array is equal to the number of local rows, i.e 'm'.
4063 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4064            submatrix (same value is used for all local rows).
4065 -  o_nnz - array containing the number of nonzeros in the various rows of the
4066            OFF-DIAGONAL portion of the local submatrix (possibly different for
4067            each row) or NULL, if o_nz is used to specify the nonzero
4068            structure. The size of this array is equal to the number
4069            of local rows, i.e 'm'.
4070 
4071    Output Parameter:
4072 .  A - the matrix
4073 
4074    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4075    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4076    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4077 
4078    Notes:
4079    If the *_nnz parameter is given then the *_nz parameter is ignored
4080 
4081    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4082    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4083    storage requirements for this matrix.
4084 
4085    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4086    processor than it must be used on all processors that share the object for
4087    that argument.
4088 
4089    The user MUST specify either the local or global matrix dimensions
4090    (possibly both).
4091 
4092    The parallel matrix is partitioned across processors such that the
4093    first m0 rows belong to process 0, the next m1 rows belong to
4094    process 1, the next m2 rows belong to process 2 etc.. where
4095    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4096    values corresponding to [m x N] submatrix.
4097 
4098    The columns are logically partitioned with the n0 columns belonging
4099    to 0th partition, the next n1 columns belonging to the next
4100    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4101 
4102    The DIAGONAL portion of the local submatrix on any given processor
4103    is the submatrix corresponding to the rows and columns m,n
4104    corresponding to the given processor. i.e diagonal matrix on
4105    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4106    etc. The remaining portion of the local submatrix [m x (N-n)]
4107    constitute the OFF-DIAGONAL portion. The example below better
4108    illustrates this concept.
4109 
4110    For a square global matrix we define each processor's diagonal portion
4111    to be its local rows and the corresponding columns (a square submatrix);
4112    each processor's off-diagonal portion encompasses the remainder of the
4113    local matrix (a rectangular submatrix).
4114 
4115    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4116 
4117    When calling this routine with a single process communicator, a matrix of
4118    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4119    type of communicator, use the construction mechanism:
4120      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4121 
4122    By default, this format uses inodes (identical nodes) when possible.
4123    We search for consecutive rows with the same nonzero structure, thereby
4124    reusing matrix information to achieve increased efficiency.
4125 
4126    Options Database Keys:
4127 +  -mat_no_inode  - Do not use inodes
4128 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4129 -  -mat_aij_oneindex - Internally use indexing starting at 1
4130         rather than 0.  Note that when calling MatSetValues(),
4131         the user still MUST index entries starting at 0!
4132 
4133 
4134    Example usage:
4135 
4136    Consider the following 8x8 matrix with 34 non-zero values, that is
4137    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4138    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4139    as follows:
4140 
4141 .vb
4142             1  2  0  |  0  3  0  |  0  4
4143     Proc0   0  5  6  |  7  0  0  |  8  0
4144             9  0 10  | 11  0  0  | 12  0
4145     -------------------------------------
4146            13  0 14  | 15 16 17  |  0  0
4147     Proc1   0 18  0  | 19 20 21  |  0  0
4148             0  0  0  | 22 23  0  | 24  0
4149     -------------------------------------
4150     Proc2  25 26 27  |  0  0 28  | 29  0
4151            30  0  0  | 31 32 33  |  0 34
4152 .ve
4153 
4154    This can be represented as a collection of submatrices as:
4155 
4156 .vb
4157       A B C
4158       D E F
4159       G H I
4160 .ve
4161 
4162    Where the submatrices A,B,C are owned by proc0, D,E,F are
4163    owned by proc1, G,H,I are owned by proc2.
4164 
4165    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4167    The 'M','N' parameters are 8,8, and have the same values on all procs.
4168 
4169    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4170    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4171    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4172    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4173    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4174    matrix, ans [DF] as another SeqAIJ matrix.
4175 
4176    When d_nz, o_nz parameters are specified, d_nz storage elements are
4177    allocated for every row of the local diagonal submatrix, and o_nz
4178    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4179    One way to choose d_nz and o_nz is to use the max nonzerors per local
4180    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4181    In this case, the values of d_nz,o_nz are:
4182 .vb
4183      proc0 : dnz = 2, o_nz = 2
4184      proc1 : dnz = 3, o_nz = 2
4185      proc2 : dnz = 1, o_nz = 4
4186 .ve
4187    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4188    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4189    for proc3. i.e we are using 12+15+10=37 storage locations to store
4190    34 values.
4191 
4192    When d_nnz, o_nnz parameters are specified, the storage is specified
4193    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4194    In the above case the values for d_nnz,o_nnz are:
4195 .vb
4196      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4197      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4198      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4199 .ve
4200    Here the space allocated is sum of all the above values i.e 34, and
4201    hence pre-allocation is perfect.
4202 
4203    Level: intermediate
4204 
4205 .keywords: matrix, aij, compressed row, sparse, parallel
4206 
4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4208           MPIAIJ, MatCreateMPIAIJWithArrays()
4209 @*/
4210 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4211 {
4212   PetscErrorCode ierr;
4213   PetscMPIInt    size;
4214 
4215   PetscFunctionBegin;
4216   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4217   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4218   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4219   if (size > 1) {
4220     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4221     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4222   } else {
4223     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4224     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4225   }
4226   PetscFunctionReturn(0);
4227 }
4228 
4229 #undef __FUNCT__
4230 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4231 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4232 {
4233   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4234 
4235   PetscFunctionBegin;
4236   *Ad     = a->A;
4237   *Ao     = a->B;
4238   *colmap = a->garray;
4239   PetscFunctionReturn(0);
4240 }
4241 
4242 #undef __FUNCT__
4243 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4244 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4245 {
4246   PetscErrorCode ierr;
4247   PetscInt       i;
4248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4249 
4250   PetscFunctionBegin;
4251   if (coloring->ctype == IS_COLORING_GLOBAL) {
4252     ISColoringValue *allcolors,*colors;
4253     ISColoring      ocoloring;
4254 
4255     /* set coloring for diagonal portion */
4256     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4257 
4258     /* set coloring for off-diagonal portion */
4259     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4260     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4261     for (i=0; i<a->B->cmap->n; i++) {
4262       colors[i] = allcolors[a->garray[i]];
4263     }
4264     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4265     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4266     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4267     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4268   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4269     ISColoringValue *colors;
4270     PetscInt        *larray;
4271     ISColoring      ocoloring;
4272 
4273     /* set coloring for diagonal portion */
4274     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4275     for (i=0; i<a->A->cmap->n; i++) {
4276       larray[i] = i + A->cmap->rstart;
4277     }
4278     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4279     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4280     for (i=0; i<a->A->cmap->n; i++) {
4281       colors[i] = coloring->colors[larray[i]];
4282     }
4283     ierr = PetscFree(larray);CHKERRQ(ierr);
4284     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4285     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4286     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4287 
4288     /* set coloring for off-diagonal portion */
4289     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4290     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4291     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4292     for (i=0; i<a->B->cmap->n; i++) {
4293       colors[i] = coloring->colors[larray[i]];
4294     }
4295     ierr = PetscFree(larray);CHKERRQ(ierr);
4296     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4297     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4298     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4299   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4300   PetscFunctionReturn(0);
4301 }
4302 
4303 #undef __FUNCT__
4304 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4305 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4306 {
4307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4308   PetscErrorCode ierr;
4309 
4310   PetscFunctionBegin;
4311   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4312   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4313   PetscFunctionReturn(0);
4314 }
4315 
4316 #undef __FUNCT__
4317 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4318 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4319 {
4320   PetscErrorCode ierr;
4321   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4322   PetscInt       *indx;
4323 
4324   PetscFunctionBegin;
4325   /* This routine will ONLY return MPIAIJ type matrix */
4326   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4327   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4328   if (n == PETSC_DECIDE) {
4329     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4330   }
4331   /* Check sum(n) = N */
4332   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4333   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4334 
4335   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4336   rstart -= m;
4337 
4338   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4339   for (i=0; i<m; i++) {
4340     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4341     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4342     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4343   }
4344 
4345   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4346   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4347   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4348   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4349   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4350   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4351   PetscFunctionReturn(0);
4352 }
4353 
4354 #undef __FUNCT__
4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4356 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4357 {
4358   PetscErrorCode ierr;
4359   PetscInt       m,N,i,rstart,nnz,Ii;
4360   PetscInt       *indx;
4361   PetscScalar    *values;
4362 
4363   PetscFunctionBegin;
4364   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4365   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4366   for (i=0; i<m; i++) {
4367     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4368     Ii   = i + rstart;
4369     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4370     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371   }
4372   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4373   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 #undef __FUNCT__
4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4379 /*@
4380       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4381                  matrices from each processor
4382 
4383     Collective on MPI_Comm
4384 
4385    Input Parameters:
4386 +    comm - the communicators the parallel matrix will live on
4387 .    inmat - the input sequential matrices
4388 .    n - number of local columns (or PETSC_DECIDE)
4389 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4390 
4391    Output Parameter:
4392 .    outmat - the parallel matrix generated
4393 
4394     Level: advanced
4395 
4396    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4397 
4398 @*/
4399 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4400 {
4401   PetscErrorCode ierr;
4402   PetscMPIInt    size;
4403 
4404   PetscFunctionBegin;
4405   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4406   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4407   if (size == 1) {
4408     if (scall == MAT_INITIAL_MATRIX) {
4409       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4410     } else {
4411       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4412     }
4413   } else {
4414     if (scall == MAT_INITIAL_MATRIX) {
4415       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4416     }
4417     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4418   }
4419   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatFileSplit"
4425 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4426 {
4427   PetscErrorCode    ierr;
4428   PetscMPIInt       rank;
4429   PetscInt          m,N,i,rstart,nnz;
4430   size_t            len;
4431   const PetscInt    *indx;
4432   PetscViewer       out;
4433   char              *name;
4434   Mat               B;
4435   const PetscScalar *values;
4436 
4437   PetscFunctionBegin;
4438   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4439   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4440   /* Should this be the type of the diagonal block of A? */
4441   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4442   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4443   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4444   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4445   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4446   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4447   for (i=0; i<m; i++) {
4448     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4449     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4450     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4451   }
4452   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4454 
4455   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4456   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4457   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4458   sprintf(name,"%s.%d",outfile,rank);
4459   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4460   ierr = PetscFree(name);CHKERRQ(ierr);
4461   ierr = MatView(B,out);CHKERRQ(ierr);
4462   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4463   ierr = MatDestroy(&B);CHKERRQ(ierr);
4464   PetscFunctionReturn(0);
4465 }
4466 
4467 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4468 #undef __FUNCT__
4469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4470 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4471 {
4472   PetscErrorCode      ierr;
4473   Mat_Merge_SeqsToMPI *merge;
4474   PetscContainer      container;
4475 
4476   PetscFunctionBegin;
4477   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4478   if (container) {
4479     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4492     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4493     ierr = PetscFree(merge);CHKERRQ(ierr);
4494     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4495   }
4496   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4497   PetscFunctionReturn(0);
4498 }
4499 
4500 #include <../src/mat/utils/freespace.h>
4501 #include <petscbt.h>
4502 
4503 #undef __FUNCT__
4504 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4505 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4506 {
4507   PetscErrorCode      ierr;
4508   MPI_Comm            comm;
4509   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4510   PetscMPIInt         size,rank,taga,*len_s;
4511   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4512   PetscInt            proc,m;
4513   PetscInt            **buf_ri,**buf_rj;
4514   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4515   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4516   MPI_Request         *s_waits,*r_waits;
4517   MPI_Status          *status;
4518   MatScalar           *aa=a->a;
4519   MatScalar           **abuf_r,*ba_i;
4520   Mat_Merge_SeqsToMPI *merge;
4521   PetscContainer      container;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4525   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4526 
4527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4529 
4530   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4531   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4532 
4533   bi     = merge->bi;
4534   bj     = merge->bj;
4535   buf_ri = merge->buf_ri;
4536   buf_rj = merge->buf_rj;
4537 
4538   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4539   owners = merge->rowmap->range;
4540   len_s  = merge->len_s;
4541 
4542   /* send and recv matrix values */
4543   /*-----------------------------*/
4544   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4545   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4546 
4547   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4548   for (proc=0,k=0; proc<size; proc++) {
4549     if (!len_s[proc]) continue;
4550     i    = owners[proc];
4551     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4552     k++;
4553   }
4554 
4555   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4556   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4557   ierr = PetscFree(status);CHKERRQ(ierr);
4558 
4559   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4560   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4561 
4562   /* insert mat values of mpimat */
4563   /*----------------------------*/
4564   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4565   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4566 
4567   for (k=0; k<merge->nrecv; k++) {
4568     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4569     nrows       = *(buf_ri_k[k]);
4570     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4571     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4572   }
4573 
4574   /* set values of ba */
4575   m = merge->rowmap->n;
4576   for (i=0; i<m; i++) {
4577     arow = owners[rank] + i;
4578     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4579     bnzi = bi[i+1] - bi[i];
4580     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4581 
4582     /* add local non-zero vals of this proc's seqmat into ba */
4583     anzi   = ai[arow+1] - ai[arow];
4584     aj     = a->j + ai[arow];
4585     aa     = a->a + ai[arow];
4586     nextaj = 0;
4587     for (j=0; nextaj<anzi; j++) {
4588       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4589         ba_i[j] += aa[nextaj++];
4590       }
4591     }
4592 
4593     /* add received vals into ba */
4594     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4595       /* i-th row */
4596       if (i == *nextrow[k]) {
4597         anzi   = *(nextai[k]+1) - *nextai[k];
4598         aj     = buf_rj[k] + *(nextai[k]);
4599         aa     = abuf_r[k] + *(nextai[k]);
4600         nextaj = 0;
4601         for (j=0; nextaj<anzi; j++) {
4602           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4603             ba_i[j] += aa[nextaj++];
4604           }
4605         }
4606         nextrow[k]++; nextai[k]++;
4607       }
4608     }
4609     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4610   }
4611   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613 
4614   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4615   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4616   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4617   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4618   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4623 
4624 #undef __FUNCT__
4625 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4626 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4627 {
4628   PetscErrorCode      ierr;
4629   Mat                 B_mpi;
4630   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4631   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4632   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4633   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4634   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4635   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4636   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4637   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4638   MPI_Status          *status;
4639   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4640   PetscBT             lnkbt;
4641   Mat_Merge_SeqsToMPI *merge;
4642   PetscContainer      container;
4643 
4644   PetscFunctionBegin;
4645   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4646 
4647   /* make sure it is a PETSc comm */
4648   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4651 
4652   ierr = PetscNew(&merge);CHKERRQ(ierr);
4653   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4654 
4655   /* determine row ownership */
4656   /*---------------------------------------------------------*/
4657   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4661   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4662   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4663   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4664 
4665   m      = merge->rowmap->n;
4666   owners = merge->rowmap->range;
4667 
4668   /* determine the number of messages to send, their lengths */
4669   /*---------------------------------------------------------*/
4670   len_s = merge->len_s;
4671 
4672   len          = 0; /* length of buf_si[] */
4673   merge->nsend = 0;
4674   for (proc=0; proc<size; proc++) {
4675     len_si[proc] = 0;
4676     if (proc == rank) {
4677       len_s[proc] = 0;
4678     } else {
4679       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4680       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4681     }
4682     if (len_s[proc]) {
4683       merge->nsend++;
4684       nrows = 0;
4685       for (i=owners[proc]; i<owners[proc+1]; i++) {
4686         if (ai[i+1] > ai[i]) nrows++;
4687       }
4688       len_si[proc] = 2*(nrows+1);
4689       len         += len_si[proc];
4690     }
4691   }
4692 
4693   /* determine the number and length of messages to receive for ij-structure */
4694   /*-------------------------------------------------------------------------*/
4695   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4696   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4697 
4698   /* post the Irecv of j-structure */
4699   /*-------------------------------*/
4700   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4701   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4702 
4703   /* post the Isend of j-structure */
4704   /*--------------------------------*/
4705   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4706 
4707   for (proc=0, k=0; proc<size; proc++) {
4708     if (!len_s[proc]) continue;
4709     i    = owners[proc];
4710     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4711     k++;
4712   }
4713 
4714   /* receives and sends of j-structure are complete */
4715   /*------------------------------------------------*/
4716   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4717   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4718 
4719   /* send and recv i-structure */
4720   /*---------------------------*/
4721   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4722   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4723 
4724   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4725   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4726   for (proc=0,k=0; proc<size; proc++) {
4727     if (!len_s[proc]) continue;
4728     /* form outgoing message for i-structure:
4729          buf_si[0]:                 nrows to be sent
4730                [1:nrows]:           row index (global)
4731                [nrows+1:2*nrows+1]: i-structure index
4732     */
4733     /*-------------------------------------------*/
4734     nrows       = len_si[proc]/2 - 1;
4735     buf_si_i    = buf_si + nrows+1;
4736     buf_si[0]   = nrows;
4737     buf_si_i[0] = 0;
4738     nrows       = 0;
4739     for (i=owners[proc]; i<owners[proc+1]; i++) {
4740       anzi = ai[i+1] - ai[i];
4741       if (anzi) {
4742         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4743         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4744         nrows++;
4745       }
4746     }
4747     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4748     k++;
4749     buf_si += len_si[proc];
4750   }
4751 
4752   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4753   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4754 
4755   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4756   for (i=0; i<merge->nrecv; i++) {
4757     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4758   }
4759 
4760   ierr = PetscFree(len_si);CHKERRQ(ierr);
4761   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4762   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4763   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4764   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4765   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4766   ierr = PetscFree(status);CHKERRQ(ierr);
4767 
4768   /* compute a local seq matrix in each processor */
4769   /*----------------------------------------------*/
4770   /* allocate bi array and free space for accumulating nonzero column info */
4771   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4772   bi[0] = 0;
4773 
4774   /* create and initialize a linked list */
4775   nlnk = N+1;
4776   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4777 
4778   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4779   len  = ai[owners[rank+1]] - ai[owners[rank]];
4780   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4781 
4782   current_space = free_space;
4783 
4784   /* determine symbolic info for each local row */
4785   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4786 
4787   for (k=0; k<merge->nrecv; k++) {
4788     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4789     nrows       = *buf_ri_k[k];
4790     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4791     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4792   }
4793 
4794   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4795   len  = 0;
4796   for (i=0; i<m; i++) {
4797     bnzi = 0;
4798     /* add local non-zero cols of this proc's seqmat into lnk */
4799     arow  = owners[rank] + i;
4800     anzi  = ai[arow+1] - ai[arow];
4801     aj    = a->j + ai[arow];
4802     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803     bnzi += nlnk;
4804     /* add received col data into lnk */
4805     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4806       if (i == *nextrow[k]) { /* i-th row */
4807         anzi  = *(nextai[k]+1) - *nextai[k];
4808         aj    = buf_rj[k] + *nextai[k];
4809         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4810         bnzi += nlnk;
4811         nextrow[k]++; nextai[k]++;
4812       }
4813     }
4814     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4815 
4816     /* if free space is not available, make more free space */
4817     if (current_space->local_remaining<bnzi) {
4818       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4819       nspacedouble++;
4820     }
4821     /* copy data into free space, then initialize lnk */
4822     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4823     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4824 
4825     current_space->array           += bnzi;
4826     current_space->local_used      += bnzi;
4827     current_space->local_remaining -= bnzi;
4828 
4829     bi[i+1] = bi[i] + bnzi;
4830   }
4831 
4832   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4833 
4834   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4835   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4836   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4837 
4838   /* create symbolic parallel matrix B_mpi */
4839   /*---------------------------------------*/
4840   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4841   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4842   if (n==PETSC_DECIDE) {
4843     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4844   } else {
4845     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4846   }
4847   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4848   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4849   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4850   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4851   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4852 
4853   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4854   B_mpi->assembled    = PETSC_FALSE;
4855   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4856   merge->bi           = bi;
4857   merge->bj           = bj;
4858   merge->buf_ri       = buf_ri;
4859   merge->buf_rj       = buf_rj;
4860   merge->coi          = NULL;
4861   merge->coj          = NULL;
4862   merge->owners_co    = NULL;
4863 
4864   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4865 
4866   /* attach the supporting struct to B_mpi for reuse */
4867   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4868   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4869   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4870   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4871   *mpimat = B_mpi;
4872 
4873   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 #undef __FUNCT__
4878 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4879 /*@C
4880       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4881                  matrices from each processor
4882 
4883     Collective on MPI_Comm
4884 
4885    Input Parameters:
4886 +    comm - the communicators the parallel matrix will live on
4887 .    seqmat - the input sequential matrices
4888 .    m - number of local rows (or PETSC_DECIDE)
4889 .    n - number of local columns (or PETSC_DECIDE)
4890 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4891 
4892    Output Parameter:
4893 .    mpimat - the parallel matrix generated
4894 
4895     Level: advanced
4896 
4897    Notes:
4898      The dimensions of the sequential matrix in each processor MUST be the same.
4899      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4900      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4901 @*/
4902 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4903 {
4904   PetscErrorCode ierr;
4905   PetscMPIInt    size;
4906 
4907   PetscFunctionBegin;
4908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4909   if (size == 1) {
4910     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4911     if (scall == MAT_INITIAL_MATRIX) {
4912       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4913     } else {
4914       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4915     }
4916     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4917     PetscFunctionReturn(0);
4918   }
4919   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920   if (scall == MAT_INITIAL_MATRIX) {
4921     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4922   }
4923   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4924   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4930 /*@
4931      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4932           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4933           with MatGetSize()
4934 
4935     Not Collective
4936 
4937    Input Parameters:
4938 +    A - the matrix
4939 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4940 
4941    Output Parameter:
4942 .    A_loc - the local sequential matrix generated
4943 
4944     Level: developer
4945 
4946 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4947 
4948 @*/
4949 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4950 {
4951   PetscErrorCode ierr;
4952   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4953   Mat_SeqAIJ     *mat,*a,*b;
4954   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4955   MatScalar      *aa,*ba,*cam;
4956   PetscScalar    *ca;
4957   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4958   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4959   PetscBool      match;
4960 
4961   PetscFunctionBegin;
4962   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4963   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4964   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4965   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4966   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4967   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4968   aa = a->a; ba = b->a;
4969   if (scall == MAT_INITIAL_MATRIX) {
4970     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4971     ci[0] = 0;
4972     for (i=0; i<am; i++) {
4973       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4974     }
4975     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4976     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4977     k    = 0;
4978     for (i=0; i<am; i++) {
4979       ncols_o = bi[i+1] - bi[i];
4980       ncols_d = ai[i+1] - ai[i];
4981       /* off-diagonal portion of A */
4982       for (jo=0; jo<ncols_o; jo++) {
4983         col = cmap[*bj];
4984         if (col >= cstart) break;
4985         cj[k]   = col; bj++;
4986         ca[k++] = *ba++;
4987       }
4988       /* diagonal portion of A */
4989       for (j=0; j<ncols_d; j++) {
4990         cj[k]   = cstart + *aj++;
4991         ca[k++] = *aa++;
4992       }
4993       /* off-diagonal portion of A */
4994       for (j=jo; j<ncols_o; j++) {
4995         cj[k]   = cmap[*bj++];
4996         ca[k++] = *ba++;
4997       }
4998     }
4999     /* put together the new matrix */
5000     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5001     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5002     /* Since these are PETSc arrays, change flags to free them as necessary. */
5003     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5004     mat->free_a  = PETSC_TRUE;
5005     mat->free_ij = PETSC_TRUE;
5006     mat->nonew   = 0;
5007   } else if (scall == MAT_REUSE_MATRIX) {
5008     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5009     ci = mat->i; cj = mat->j; cam = mat->a;
5010     for (i=0; i<am; i++) {
5011       /* off-diagonal portion of A */
5012       ncols_o = bi[i+1] - bi[i];
5013       for (jo=0; jo<ncols_o; jo++) {
5014         col = cmap[*bj];
5015         if (col >= cstart) break;
5016         *cam++ = *ba++; bj++;
5017       }
5018       /* diagonal portion of A */
5019       ncols_d = ai[i+1] - ai[i];
5020       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5021       /* off-diagonal portion of A */
5022       for (j=jo; j<ncols_o; j++) {
5023         *cam++ = *ba++; bj++;
5024       }
5025     }
5026   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5027   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5028   PetscFunctionReturn(0);
5029 }
5030 
5031 #undef __FUNCT__
5032 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5033 /*@C
5034      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5035 
5036     Not Collective
5037 
5038    Input Parameters:
5039 +    A - the matrix
5040 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5041 -    row, col - index sets of rows and columns to extract (or NULL)
5042 
5043    Output Parameter:
5044 .    A_loc - the local sequential matrix generated
5045 
5046     Level: developer
5047 
5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5049 
5050 @*/
5051 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5052 {
5053   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5054   PetscErrorCode ierr;
5055   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5056   IS             isrowa,iscola;
5057   Mat            *aloc;
5058   PetscBool      match;
5059 
5060   PetscFunctionBegin;
5061   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5062   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5063   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5064   if (!row) {
5065     start = A->rmap->rstart; end = A->rmap->rend;
5066     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5067   } else {
5068     isrowa = *row;
5069   }
5070   if (!col) {
5071     start = A->cmap->rstart;
5072     cmap  = a->garray;
5073     nzA   = a->A->cmap->n;
5074     nzB   = a->B->cmap->n;
5075     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5076     ncols = 0;
5077     for (i=0; i<nzB; i++) {
5078       if (cmap[i] < start) idx[ncols++] = cmap[i];
5079       else break;
5080     }
5081     imark = i;
5082     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5083     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5084     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5085   } else {
5086     iscola = *col;
5087   }
5088   if (scall != MAT_INITIAL_MATRIX) {
5089     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5090     aloc[0] = *A_loc;
5091   }
5092   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5093   *A_loc = aloc[0];
5094   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5095   if (!row) {
5096     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5097   }
5098   if (!col) {
5099     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5100   }
5101   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 #undef __FUNCT__
5106 #define __FUNCT__ "MatGetBrowsOfAcols"
5107 /*@C
5108     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5109 
5110     Collective on Mat
5111 
5112    Input Parameters:
5113 +    A,B - the matrices in mpiaij format
5114 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5115 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5116 
5117    Output Parameter:
5118 +    rowb, colb - index sets of rows and columns of B to extract
5119 -    B_seq - the sequential matrix generated
5120 
5121     Level: developer
5122 
5123 @*/
5124 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5125 {
5126   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5127   PetscErrorCode ierr;
5128   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5129   IS             isrowb,iscolb;
5130   Mat            *bseq=NULL;
5131 
5132   PetscFunctionBegin;
5133   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5134     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5135   }
5136   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5137 
5138   if (scall == MAT_INITIAL_MATRIX) {
5139     start = A->cmap->rstart;
5140     cmap  = a->garray;
5141     nzA   = a->A->cmap->n;
5142     nzB   = a->B->cmap->n;
5143     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5144     ncols = 0;
5145     for (i=0; i<nzB; i++) {  /* row < local row index */
5146       if (cmap[i] < start) idx[ncols++] = cmap[i];
5147       else break;
5148     }
5149     imark = i;
5150     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5151     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5152     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5153     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5154   } else {
5155     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5156     isrowb  = *rowb; iscolb = *colb;
5157     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5158     bseq[0] = *B_seq;
5159   }
5160   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5161   *B_seq = bseq[0];
5162   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5163   if (!rowb) {
5164     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5165   } else {
5166     *rowb = isrowb;
5167   }
5168   if (!colb) {
5169     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5170   } else {
5171     *colb = iscolb;
5172   }
5173   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5174   PetscFunctionReturn(0);
5175 }
5176 
5177 #undef __FUNCT__
5178 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5179 /*
5180     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5181     of the OFF-DIAGONAL portion of local A
5182 
5183     Collective on Mat
5184 
5185    Input Parameters:
5186 +    A,B - the matrices in mpiaij format
5187 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5188 
5189    Output Parameter:
5190 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5191 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5192 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5193 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5194 
5195     Level: developer
5196 
5197 */
5198 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5199 {
5200   VecScatter_MPI_General *gen_to,*gen_from;
5201   PetscErrorCode         ierr;
5202   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5203   Mat_SeqAIJ             *b_oth;
5204   VecScatter             ctx =a->Mvctx;
5205   MPI_Comm               comm;
5206   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5207   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5208   PetscScalar            *rvalues,*svalues;
5209   MatScalar              *b_otha,*bufa,*bufA;
5210   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5211   MPI_Request            *rwaits = NULL,*swaits = NULL;
5212   MPI_Status             *sstatus,rstatus;
5213   PetscMPIInt            jj;
5214   PetscInt               *cols,sbs,rbs;
5215   PetscScalar            *vals;
5216 
5217   PetscFunctionBegin;
5218   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5219   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5220     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5221   }
5222   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5223   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5224 
5225   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5226   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5227   rvalues  = gen_from->values; /* holds the length of receiving row */
5228   svalues  = gen_to->values;   /* holds the length of sending row */
5229   nrecvs   = gen_from->n;
5230   nsends   = gen_to->n;
5231 
5232   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5233   srow    = gen_to->indices;    /* local row index to be sent */
5234   sstarts = gen_to->starts;
5235   sprocs  = gen_to->procs;
5236   sstatus = gen_to->sstatus;
5237   sbs     = gen_to->bs;
5238   rstarts = gen_from->starts;
5239   rprocs  = gen_from->procs;
5240   rbs     = gen_from->bs;
5241 
5242   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5243   if (scall == MAT_INITIAL_MATRIX) {
5244     /* i-array */
5245     /*---------*/
5246     /*  post receives */
5247     for (i=0; i<nrecvs; i++) {
5248       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5249       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5250       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5251     }
5252 
5253     /* pack the outgoing message */
5254     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5255 
5256     sstartsj[0] = 0;
5257     rstartsj[0] = 0;
5258     len         = 0; /* total length of j or a array to be sent */
5259     k           = 0;
5260     for (i=0; i<nsends; i++) {
5261       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5262       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5263       for (j=0; j<nrows; j++) {
5264         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5265         for (l=0; l<sbs; l++) {
5266           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5267 
5268           rowlen[j*sbs+l] = ncols;
5269 
5270           len += ncols;
5271           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5272         }
5273         k++;
5274       }
5275       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5276 
5277       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5278     }
5279     /* recvs and sends of i-array are completed */
5280     i = nrecvs;
5281     while (i--) {
5282       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5283     }
5284     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5285 
5286     /* allocate buffers for sending j and a arrays */
5287     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5288     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5289 
5290     /* create i-array of B_oth */
5291     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5292 
5293     b_othi[0] = 0;
5294     len       = 0; /* total length of j or a array to be received */
5295     k         = 0;
5296     for (i=0; i<nrecvs; i++) {
5297       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5298       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5299       for (j=0; j<nrows; j++) {
5300         b_othi[k+1] = b_othi[k] + rowlen[j];
5301         len        += rowlen[j]; k++;
5302       }
5303       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5304     }
5305 
5306     /* allocate space for j and a arrrays of B_oth */
5307     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5308     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5309 
5310     /* j-array */
5311     /*---------*/
5312     /*  post receives of j-array */
5313     for (i=0; i<nrecvs; i++) {
5314       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5315       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* pack the outgoing message j-array */
5319     k = 0;
5320     for (i=0; i<nsends; i++) {
5321       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5322       bufJ  = bufj+sstartsj[i];
5323       for (j=0; j<nrows; j++) {
5324         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5325         for (ll=0; ll<sbs; ll++) {
5326           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5327           for (l=0; l<ncols; l++) {
5328             *bufJ++ = cols[l];
5329           }
5330           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5331         }
5332       }
5333       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5334     }
5335 
5336     /* recvs and sends of j-array are completed */
5337     i = nrecvs;
5338     while (i--) {
5339       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5340     }
5341     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5342   } else if (scall == MAT_REUSE_MATRIX) {
5343     sstartsj = *startsj_s;
5344     rstartsj = *startsj_r;
5345     bufa     = *bufa_ptr;
5346     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5347     b_otha   = b_oth->a;
5348   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5349 
5350   /* a-array */
5351   /*---------*/
5352   /*  post receives of a-array */
5353   for (i=0; i<nrecvs; i++) {
5354     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356   }
5357 
5358   /* pack the outgoing message a-array */
5359   k = 0;
5360   for (i=0; i<nsends; i++) {
5361     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362     bufA  = bufa+sstartsj[i];
5363     for (j=0; j<nrows; j++) {
5364       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5365       for (ll=0; ll<sbs; ll++) {
5366         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5367         for (l=0; l<ncols; l++) {
5368           *bufA++ = vals[l];
5369         }
5370         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5371       }
5372     }
5373     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5374   }
5375   /* recvs and sends of a-array are completed */
5376   i = nrecvs;
5377   while (i--) {
5378     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5379   }
5380   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5381   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5382 
5383   if (scall == MAT_INITIAL_MATRIX) {
5384     /* put together the new matrix */
5385     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5386 
5387     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5388     /* Since these are PETSc arrays, change flags to free them as necessary. */
5389     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5390     b_oth->free_a  = PETSC_TRUE;
5391     b_oth->free_ij = PETSC_TRUE;
5392     b_oth->nonew   = 0;
5393 
5394     ierr = PetscFree(bufj);CHKERRQ(ierr);
5395     if (!startsj_s || !bufa_ptr) {
5396       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5397       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5398     } else {
5399       *startsj_s = sstartsj;
5400       *startsj_r = rstartsj;
5401       *bufa_ptr  = bufa;
5402     }
5403   }
5404   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5405   PetscFunctionReturn(0);
5406 }
5407 
5408 #undef __FUNCT__
5409 #define __FUNCT__ "MatGetCommunicationStructs"
5410 /*@C
5411   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5412 
5413   Not Collective
5414 
5415   Input Parameters:
5416 . A - The matrix in mpiaij format
5417 
5418   Output Parameter:
5419 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5420 . colmap - A map from global column index to local index into lvec
5421 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5422 
5423   Level: developer
5424 
5425 @*/
5426 #if defined(PETSC_USE_CTABLE)
5427 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5428 #else
5429 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5430 #endif
5431 {
5432   Mat_MPIAIJ *a;
5433 
5434   PetscFunctionBegin;
5435   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5436   PetscValidPointer(lvec, 2);
5437   PetscValidPointer(colmap, 3);
5438   PetscValidPointer(multScatter, 4);
5439   a = (Mat_MPIAIJ*) A->data;
5440   if (lvec) *lvec = a->lvec;
5441   if (colmap) *colmap = a->colmap;
5442   if (multScatter) *multScatter = a->Mvctx;
5443   PetscFunctionReturn(0);
5444 }
5445 
5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5449 
5450 #undef __FUNCT__
5451 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5452 /*
5453     Computes (B'*A')' since computing B*A directly is untenable
5454 
5455                n                       p                          p
5456         (              )       (              )         (                  )
5457       m (      A       )  *  n (       B      )   =   m (         C        )
5458         (              )       (              )         (                  )
5459 
5460 */
5461 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5462 {
5463   PetscErrorCode ierr;
5464   Mat            At,Bt,Ct;
5465 
5466   PetscFunctionBegin;
5467   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5468   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5469   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5470   ierr = MatDestroy(&At);CHKERRQ(ierr);
5471   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5472   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5473   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5474   PetscFunctionReturn(0);
5475 }
5476 
5477 #undef __FUNCT__
5478 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5479 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5480 {
5481   PetscErrorCode ierr;
5482   PetscInt       m=A->rmap->n,n=B->cmap->n;
5483   Mat            Cmat;
5484 
5485   PetscFunctionBegin;
5486   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5487   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5488   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5489   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5490   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5491   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5492   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494 
5495   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5496 
5497   *C = Cmat;
5498   PetscFunctionReturn(0);
5499 }
5500 
5501 /* ----------------------------------------------------------------*/
5502 #undef __FUNCT__
5503 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5504 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5505 {
5506   PetscErrorCode ierr;
5507 
5508   PetscFunctionBegin;
5509   if (scall == MAT_INITIAL_MATRIX) {
5510     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5511     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5512     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5513   }
5514   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5515   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5516   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5517   PetscFunctionReturn(0);
5518 }
5519 
5520 #if defined(PETSC_HAVE_MUMPS)
5521 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5522 #endif
5523 #if defined(PETSC_HAVE_PASTIX)
5524 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5525 #endif
5526 #if defined(PETSC_HAVE_SUPERLU_DIST)
5527 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5528 #endif
5529 #if defined(PETSC_HAVE_CLIQUE)
5530 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5531 #endif
5532 
5533 /*MC
5534    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5535 
5536    Options Database Keys:
5537 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5538 
5539   Level: beginner
5540 
5541 .seealso: MatCreateAIJ()
5542 M*/
5543 
5544 #undef __FUNCT__
5545 #define __FUNCT__ "MatCreate_MPIAIJ"
5546 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5547 {
5548   Mat_MPIAIJ     *b;
5549   PetscErrorCode ierr;
5550   PetscMPIInt    size;
5551 
5552   PetscFunctionBegin;
5553   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5554 
5555   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5556   B->data       = (void*)b;
5557   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5558   B->assembled  = PETSC_FALSE;
5559   B->insertmode = NOT_SET_VALUES;
5560   b->size       = size;
5561 
5562   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5563 
5564   /* build cache for off array entries formed */
5565   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5566 
5567   b->donotstash  = PETSC_FALSE;
5568   b->colmap      = 0;
5569   b->garray      = 0;
5570   b->roworiented = PETSC_TRUE;
5571 
5572   /* stuff used for matrix vector multiply */
5573   b->lvec  = NULL;
5574   b->Mvctx = NULL;
5575 
5576   /* stuff for MatGetRow() */
5577   b->rowindices   = 0;
5578   b->rowvalues    = 0;
5579   b->getrowactive = PETSC_FALSE;
5580 
5581   /* flexible pointer used in CUSP/CUSPARSE classes */
5582   b->spptr = NULL;
5583 
5584 #if defined(PETSC_HAVE_MUMPS)
5585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5586 #endif
5587 #if defined(PETSC_HAVE_PASTIX)
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5589 #endif
5590 #if defined(PETSC_HAVE_SUPERLU_DIST)
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5592 #endif
5593 #if defined(PETSC_HAVE_CLIQUE)
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5595 #endif
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5610   PetscFunctionReturn(0);
5611 }
5612 
5613 #undef __FUNCT__
5614 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5615 /*@
5616      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5617          and "off-diagonal" part of the matrix in CSR format.
5618 
5619    Collective on MPI_Comm
5620 
5621    Input Parameters:
5622 +  comm - MPI communicator
5623 .  m - number of local rows (Cannot be PETSC_DECIDE)
5624 .  n - This value should be the same as the local size used in creating the
5625        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5626        calculated if N is given) For square matrices n is almost always m.
5627 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5628 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5629 .   i - row indices for "diagonal" portion of matrix
5630 .   j - column indices
5631 .   a - matrix values
5632 .   oi - row indices for "off-diagonal" portion of matrix
5633 .   oj - column indices
5634 -   oa - matrix values
5635 
5636    Output Parameter:
5637 .   mat - the matrix
5638 
5639    Level: advanced
5640 
5641    Notes:
5642        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5643        must free the arrays once the matrix has been destroyed and not before.
5644 
5645        The i and j indices are 0 based
5646 
5647        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5648 
5649        This sets local rows and cannot be used to set off-processor values.
5650 
5651        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5652        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5653        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5654        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5655        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5656        communication if it is known that only local entries will be set.
5657 
5658 .keywords: matrix, aij, compressed row, sparse, parallel
5659 
5660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5661           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5662 @*/
5663 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5664 {
5665   PetscErrorCode ierr;
5666   Mat_MPIAIJ     *maij;
5667 
5668   PetscFunctionBegin;
5669   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5670   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5671   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5672   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5673   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5674   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5675   maij = (Mat_MPIAIJ*) (*mat)->data;
5676 
5677   (*mat)->preallocated = PETSC_TRUE;
5678 
5679   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5680   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5681 
5682   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5684 
5685   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689 
5690   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5693   PetscFunctionReturn(0);
5694 }
5695 
5696 /*
5697     Special version for direct calls from Fortran
5698 */
5699 #include <petsc-private/fortranimpl.h>
5700 
5701 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5702 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5703 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5704 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5705 #endif
5706 
5707 /* Change these macros so can be used in void function */
5708 #undef CHKERRQ
5709 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5710 #undef SETERRQ2
5711 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5712 #undef SETERRQ3
5713 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5714 #undef SETERRQ
5715 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5716 
5717 #undef __FUNCT__
5718 #define __FUNCT__ "matsetvaluesmpiaij_"
5719 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5720 {
5721   Mat            mat  = *mmat;
5722   PetscInt       m    = *mm, n = *mn;
5723   InsertMode     addv = *maddv;
5724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5725   PetscScalar    value;
5726   PetscErrorCode ierr;
5727 
5728   MatCheckPreallocated(mat,1);
5729   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5730 
5731 #if defined(PETSC_USE_DEBUG)
5732   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5733 #endif
5734   {
5735     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5736     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5737     PetscBool roworiented = aij->roworiented;
5738 
5739     /* Some Variables required in the macro */
5740     Mat        A                 = aij->A;
5741     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5742     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5743     MatScalar  *aa               = a->a;
5744     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5745     Mat        B                 = aij->B;
5746     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5747     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5748     MatScalar  *ba               = b->a;
5749 
5750     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5751     PetscInt  nonew = a->nonew;
5752     MatScalar *ap1,*ap2;
5753 
5754     PetscFunctionBegin;
5755     for (i=0; i<m; i++) {
5756       if (im[i] < 0) continue;
5757 #if defined(PETSC_USE_DEBUG)
5758       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5759 #endif
5760       if (im[i] >= rstart && im[i] < rend) {
5761         row      = im[i] - rstart;
5762         lastcol1 = -1;
5763         rp1      = aj + ai[row];
5764         ap1      = aa + ai[row];
5765         rmax1    = aimax[row];
5766         nrow1    = ailen[row];
5767         low1     = 0;
5768         high1    = nrow1;
5769         lastcol2 = -1;
5770         rp2      = bj + bi[row];
5771         ap2      = ba + bi[row];
5772         rmax2    = bimax[row];
5773         nrow2    = bilen[row];
5774         low2     = 0;
5775         high2    = nrow2;
5776 
5777         for (j=0; j<n; j++) {
5778           if (roworiented) value = v[i*n+j];
5779           else value = v[i+j*m];
5780           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5781           if (in[j] >= cstart && in[j] < cend) {
5782             col = in[j] - cstart;
5783             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5784           } else if (in[j] < 0) continue;
5785 #if defined(PETSC_USE_DEBUG)
5786           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5787 #endif
5788           else {
5789             if (mat->was_assembled) {
5790               if (!aij->colmap) {
5791                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5792               }
5793 #if defined(PETSC_USE_CTABLE)
5794               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5795               col--;
5796 #else
5797               col = aij->colmap[in[j]] - 1;
5798 #endif
5799               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5800                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5801                 col  =  in[j];
5802                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5803                 B     = aij->B;
5804                 b     = (Mat_SeqAIJ*)B->data;
5805                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5806                 rp2   = bj + bi[row];
5807                 ap2   = ba + bi[row];
5808                 rmax2 = bimax[row];
5809                 nrow2 = bilen[row];
5810                 low2  = 0;
5811                 high2 = nrow2;
5812                 bm    = aij->B->rmap->n;
5813                 ba    = b->a;
5814               }
5815             } else col = in[j];
5816             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5817           }
5818         }
5819       } else if (!aij->donotstash) {
5820         if (roworiented) {
5821           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5822         } else {
5823           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5824         }
5825       }
5826     }
5827   }
5828   PetscFunctionReturnVoid();
5829 }
5830 
5831