xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1f0e42cf17e8fdc56355134e13a5edcdb6dc1c19)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     MatCheckPreallocated(A,1);
1801     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1802     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_ROW_ORIENTED:
1805     a->roworiented = flg;
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_NEW_DIAGONALS:
1810     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1811     break;
1812   case MAT_IGNORE_OFF_PROC_ENTRIES:
1813     a->donotstash = flg;
1814     break;
1815   case MAT_SPD:
1816     A->spd_set                         = PETSC_TRUE;
1817     A->spd                             = flg;
1818     if (flg) {
1819       A->symmetric                     = PETSC_TRUE;
1820       A->structurally_symmetric        = PETSC_TRUE;
1821       A->symmetric_set                 = PETSC_TRUE;
1822       A->structurally_symmetric_set    = PETSC_TRUE;
1823     }
1824     break;
1825   case MAT_SYMMETRIC:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_HERMITIAN:
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_SYMMETRY_ETERNAL:
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837 #if defined(PETSC_HAVE_TXPETSCGPU)
1838     // Sorry, I don't know how else to add MatOptions for data specific
1839     // to GPU classes without protecting against it in this case statement.
1840     // If anyone knows of a better way, let me know. Paul
1841   case MAT_DIAGBLOCK_CSR:
1842   case MAT_OFFDIAGBLOCK_CSR:
1843   case MAT_CSR:
1844   case MAT_DIAGBLOCK_DIA:
1845   case MAT_OFFDIAGBLOCK_DIA:
1846   case MAT_DIA:
1847   case MAT_DIAGBLOCK_ELL:
1848   case MAT_OFFDIAGBLOCK_ELL:
1849   case MAT_ELL:
1850   case MAT_DIAGBLOCK_HYB:
1851   case MAT_OFFDIAGBLOCK_HYB:
1852   case MAT_HYB:
1853     /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */
1854     break;
1855 #endif
1856   default:
1857     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1858   }
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetRow_MPIAIJ"
1864 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1865 {
1866   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1867   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1868   PetscErrorCode ierr;
1869   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1870   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1871   PetscInt       *cmap,*idx_p;
1872 
1873   PetscFunctionBegin;
1874   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1875   mat->getrowactive = PETSC_TRUE;
1876 
1877   if (!mat->rowvalues && (idx || v)) {
1878     /*
1879         allocate enough space to hold information from the longest row.
1880     */
1881     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1882     PetscInt   max = 1,tmp;
1883     for (i=0; i<matin->rmap->n; i++) {
1884       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1885       if (max < tmp) { max = tmp; }
1886     }
1887     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1888   }
1889 
1890   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1891   lrow = row - rstart;
1892 
1893   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1894   if (!v)   {pvA = 0; pvB = 0;}
1895   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1896   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1897   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1898   nztot = nzA + nzB;
1899 
1900   cmap  = mat->garray;
1901   if (v  || idx) {
1902     if (nztot) {
1903       /* Sort by increasing column numbers, assuming A and B already sorted */
1904       PetscInt imark = -1;
1905       if (v) {
1906         *v = v_p = mat->rowvalues;
1907         for (i=0; i<nzB; i++) {
1908           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1909           else break;
1910         }
1911         imark = i;
1912         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1913         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1914       }
1915       if (idx) {
1916         *idx = idx_p = mat->rowindices;
1917         if (imark > -1) {
1918           for (i=0; i<imark; i++) {
1919             idx_p[i] = cmap[cworkB[i]];
1920           }
1921         } else {
1922           for (i=0; i<nzB; i++) {
1923             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1924             else break;
1925           }
1926           imark = i;
1927         }
1928         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1929         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1930       }
1931     } else {
1932       if (idx) *idx = 0;
1933       if (v)   *v   = 0;
1934     }
1935   }
1936   *nz = nztot;
1937   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1938   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1944 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1945 {
1946   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1947 
1948   PetscFunctionBegin;
1949   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1950   aij->getrowactive = PETSC_FALSE;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatNorm_MPIAIJ"
1956 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1957 {
1958   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1959   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1960   PetscErrorCode ierr;
1961   PetscInt       i,j,cstart = mat->cmap->rstart;
1962   PetscReal      sum = 0.0;
1963   MatScalar      *v;
1964 
1965   PetscFunctionBegin;
1966   if (aij->size == 1) {
1967     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1968   } else {
1969     if (type == NORM_FROBENIUS) {
1970       v = amat->a;
1971       for (i=0; i<amat->nz; i++) {
1972 #if defined(PETSC_USE_COMPLEX)
1973         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1974 #else
1975         sum += (*v)*(*v); v++;
1976 #endif
1977       }
1978       v = bmat->a;
1979       for (i=0; i<bmat->nz; i++) {
1980 #if defined(PETSC_USE_COMPLEX)
1981         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1982 #else
1983         sum += (*v)*(*v); v++;
1984 #endif
1985       }
1986       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1987       *norm = PetscSqrtReal(*norm);
1988     } else if (type == NORM_1) { /* max column norm */
1989       PetscReal *tmp,*tmp2;
1990       PetscInt  *jj,*garray = aij->garray;
1991       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1992       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1993       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1994       *norm = 0.0;
1995       v = amat->a; jj = amat->j;
1996       for (j=0; j<amat->nz; j++) {
1997         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1998       }
1999       v = bmat->a; jj = bmat->j;
2000       for (j=0; j<bmat->nz; j++) {
2001         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2002       }
2003       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2004       for (j=0; j<mat->cmap->N; j++) {
2005         if (tmp2[j] > *norm) *norm = tmp2[j];
2006       }
2007       ierr = PetscFree(tmp);CHKERRQ(ierr);
2008       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2009     } else if (type == NORM_INFINITY) { /* max row norm */
2010       PetscReal ntemp = 0.0;
2011       for (j=0; j<aij->A->rmap->n; j++) {
2012         v = amat->a + amat->i[j];
2013         sum = 0.0;
2014         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2015           sum += PetscAbsScalar(*v); v++;
2016         }
2017         v = bmat->a + bmat->i[j];
2018         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2019           sum += PetscAbsScalar(*v); v++;
2020         }
2021         if (sum > ntemp) ntemp = sum;
2022       }
2023       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2024     } else {
2025       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2026     }
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatTranspose_MPIAIJ"
2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2034 {
2035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2036   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2037   PetscErrorCode ierr;
2038   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2039   PetscInt       cstart=A->cmap->rstart,ncol;
2040   Mat            B;
2041   MatScalar      *array;
2042 
2043   PetscFunctionBegin;
2044   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2045 
2046   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2047   ai = Aloc->i; aj = Aloc->j;
2048   bi = Bloc->i; bj = Bloc->j;
2049   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2050     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2051     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2052     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2053     for (i=0; i<ai[ma]; i++){
2054       d_nnz[aj[i]] ++;
2055       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2056     }
2057 
2058     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2059     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2060     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2061     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2062     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2063     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2064     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2065   } else {
2066     B = *matout;
2067   }
2068 
2069   /* copy over the A part */
2070   array = Aloc->a;
2071   row = A->rmap->rstart;
2072   for (i=0; i<ma; i++) {
2073     ncol = ai[i+1]-ai[i];
2074     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2075     row++; array += ncol; aj += ncol;
2076   }
2077   aj = Aloc->j;
2078   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2079 
2080   /* copy over the B part */
2081   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2082   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2083   array = Bloc->a;
2084   row = A->rmap->rstart;
2085   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2086   cols_tmp = cols;
2087   for (i=0; i<mb; i++) {
2088     ncol = bi[i+1]-bi[i];
2089     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2090     row++; array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2107 {
2108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2109   Mat            a = aij->A,b = aij->B;
2110   PetscErrorCode ierr;
2111   PetscInt       s1,s2,s3;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2115   if (rr) {
2116     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2117     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2118     /* Overlap communication with computation. */
2119     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2120   }
2121   if (ll) {
2122     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2123     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2124     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2125   }
2126   /* scale  the diagonal block */
2127   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2128 
2129   if (rr) {
2130     /* Do a scatter end and then right scale the off-diagonal block */
2131     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2132     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2133   }
2134 
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatEqual_MPIAIJ"
2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2153 {
2154   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2155   Mat            a,b,c,d;
2156   PetscBool      flg;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   a = matA->A; b = matA->B;
2161   c = matB->A; d = matB->B;
2162 
2163   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2164   if (flg) {
2165     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2166   }
2167   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatCopy_MPIAIJ"
2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2174 {
2175   PetscErrorCode ierr;
2176   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2177   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2181   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2182     /* because of the column compression in the off-processor part of the matrix a->B,
2183        the number of columns in a->B and b->B may be different, hence we cannot call
2184        the MatCopy() directly on the two parts. If need be, we can provide a more
2185        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2186        then copying the submatrices */
2187     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2188   } else {
2189     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2190     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatSetUp_MPIAIJ"
2197 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2198 {
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2210 {
2211   PetscInt          i,m=Y->rmap->N;
2212   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2213   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2214   const PetscInt    *xi = x->i,*yi = y->i;
2215 
2216   PetscFunctionBegin;
2217   /* Set the number of nonzeros in the new matrix */
2218   for(i=0; i<m; i++) {
2219     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2220     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2221     nnz[i] = 0;
2222     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2223       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2224       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2225       nnz[i]++;
2226     }
2227     for (; k<nzy; k++) nnz[i]++;
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatAXPY_MPIAIJ"
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   PetscInt       i;
2238   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2239   PetscBLASInt   bnz,one=1;
2240   Mat_SeqAIJ     *x,*y;
2241 
2242   PetscFunctionBegin;
2243   if (str == SAME_NONZERO_PATTERN) {
2244     PetscScalar alpha = a;
2245     x = (Mat_SeqAIJ *)xx->A->data;
2246     y = (Mat_SeqAIJ *)yy->A->data;
2247     bnz = PetscBLASIntCast(x->nz);
2248     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2249     x = (Mat_SeqAIJ *)xx->B->data;
2250     y = (Mat_SeqAIJ *)yy->B->data;
2251     bnz = PetscBLASIntCast(x->nz);
2252     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2253   } else if (str == SUBSET_NONZERO_PATTERN) {
2254     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2255 
2256     x = (Mat_SeqAIJ *)xx->B->data;
2257     y = (Mat_SeqAIJ *)yy->B->data;
2258     if (y->xtoy && y->XtoY != xx->B) {
2259       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2260       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2261     }
2262     if (!y->xtoy) { /* get xtoy */
2263       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2264       y->XtoY = xx->B;
2265       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2266     }
2267     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2268   } else {
2269     Mat B;
2270     PetscInt *nnz_d,*nnz_o;
2271     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2272     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2273     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2274     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2275     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2276     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2277     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2278     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2279     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2281     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2282     ierr = MatHeaderReplace(Y,B);
2283     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2284     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatConjugate_MPIAIJ"
2293 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2294 {
2295 #if defined(PETSC_USE_COMPLEX)
2296   PetscErrorCode ierr;
2297   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2301   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2302 #else
2303   PetscFunctionBegin;
2304 #endif
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatRealPart_MPIAIJ"
2310 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2311 {
2312   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2317   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2323 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2324 {
2325   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2330   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #ifdef PETSC_HAVE_PBGL
2335 
2336 #include <boost/parallel/mpi/bsp_process_group.hpp>
2337 #include <boost/graph/distributed/ilu_default_graph.hpp>
2338 #include <boost/graph/distributed/ilu_0_block.hpp>
2339 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2340 #include <boost/graph/distributed/petsc/interface.hpp>
2341 #include <boost/multi_array.hpp>
2342 #include <boost/parallel/distributed_property_map->hpp>
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2346 /*
2347   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2348 */
2349 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2350 {
2351   namespace petsc = boost::distributed::petsc;
2352 
2353   namespace graph_dist = boost::graph::distributed;
2354   using boost::graph::distributed::ilu_default::process_group_type;
2355   using boost::graph::ilu_permuted;
2356 
2357   PetscBool       row_identity, col_identity;
2358   PetscContainer  c;
2359   PetscInt        m, n, M, N;
2360   PetscErrorCode  ierr;
2361 
2362   PetscFunctionBegin;
2363   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2364   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2365   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2366   if (!row_identity || !col_identity) {
2367     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2368   }
2369 
2370   process_group_type pg;
2371   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2372   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2373   lgraph_type&   level_graph = *lgraph_p;
2374   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2375 
2376   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2377   ilu_permuted(level_graph);
2378 
2379   /* put together the new matrix */
2380   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2381   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2382   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2383   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2384   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2385   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388 
2389   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2390   ierr = PetscContainerSetPointer(c, lgraph_p);
2391   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2392   ierr = PetscContainerDestroy(&c);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2398 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2399 {
2400   PetscFunctionBegin;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatSolve_MPIAIJ"
2406 /*
2407   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2408 */
2409 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2410 {
2411   namespace graph_dist = boost::graph::distributed;
2412 
2413   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2414   lgraph_type*   lgraph_p;
2415   PetscContainer c;
2416   PetscErrorCode ierr;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2420   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2421   ierr = VecCopy(b, x);CHKERRQ(ierr);
2422 
2423   PetscScalar* array_x;
2424   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2425   PetscInt sx;
2426   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2427 
2428   PetscScalar* array_b;
2429   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2430   PetscInt sb;
2431   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2432 
2433   lgraph_type&   level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2437   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2438                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2439 
2440   typedef boost::iterator_property_map<array_ref_type::iterator,
2441                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2442   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2443                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2444 
2445   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2446 
2447   PetscFunctionReturn(0);
2448 }
2449 #endif
2450 
2451 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2452   PetscInt       nzlocal,nsends,nrecvs;
2453   PetscMPIInt    *send_rank,*recv_rank;
2454   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2455   PetscScalar    *sbuf_a,**rbuf_a;
2456   PetscErrorCode (*Destroy)(Mat);
2457 } Mat_Redundant;
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2461 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2462 {
2463   PetscErrorCode       ierr;
2464   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2465   PetscInt             i;
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2469   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2470   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2471   for (i=0; i<redund->nrecvs; i++){
2472     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2473     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2474   }
2475   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2476   ierr = PetscFree(redund);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatDestroy_MatRedundant"
2482 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2483 {
2484   PetscErrorCode  ierr;
2485   PetscContainer  container;
2486   Mat_Redundant   *redund=PETSC_NULL;
2487 
2488   PetscFunctionBegin;
2489   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2490   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2491   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2492   A->ops->destroy = redund->Destroy;
2493   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2494   if (A->ops->destroy) {
2495     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2496   }
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2502 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2503 {
2504   PetscMPIInt    rank,size;
2505   MPI_Comm       comm=((PetscObject)mat)->comm;
2506   PetscErrorCode ierr;
2507   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2508   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2509   PetscInt       *rowrange=mat->rmap->range;
2510   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2511   Mat            A=aij->A,B=aij->B,C=*matredundant;
2512   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2513   PetscScalar    *sbuf_a;
2514   PetscInt       nzlocal=a->nz+b->nz;
2515   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2516   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2517   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2518   MatScalar      *aworkA,*aworkB;
2519   PetscScalar    *vals;
2520   PetscMPIInt    tag1,tag2,tag3,imdex;
2521   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2522                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2523   MPI_Status     recv_status,*send_status;
2524   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2525   PetscInt       **rbuf_j=PETSC_NULL;
2526   PetscScalar    **rbuf_a=PETSC_NULL;
2527   Mat_Redundant  *redund=PETSC_NULL;
2528   PetscContainer container;
2529 
2530   PetscFunctionBegin;
2531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2533 
2534   if (reuse == MAT_REUSE_MATRIX) {
2535     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2536     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2537     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2538     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2539     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2540     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2541     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2542     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2543 
2544     nsends    = redund->nsends;
2545     nrecvs    = redund->nrecvs;
2546     send_rank = redund->send_rank;
2547     recv_rank = redund->recv_rank;
2548     sbuf_nz   = redund->sbuf_nz;
2549     rbuf_nz   = redund->rbuf_nz;
2550     sbuf_j    = redund->sbuf_j;
2551     sbuf_a    = redund->sbuf_a;
2552     rbuf_j    = redund->rbuf_j;
2553     rbuf_a    = redund->rbuf_a;
2554   }
2555 
2556   if (reuse == MAT_INITIAL_MATRIX){
2557     PetscMPIInt  subrank,subsize;
2558     PetscInt     nleftover,np_subcomm;
2559     /* get the destination processors' id send_rank, nsends and nrecvs */
2560     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2561     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2562     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2563     np_subcomm = size/nsubcomm;
2564     nleftover  = size - nsubcomm*np_subcomm;
2565     nsends = 0; nrecvs = 0;
2566     for (i=0; i<size; i++){ /* i=rank*/
2567       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2568         send_rank[nsends] = i; nsends++;
2569         recv_rank[nrecvs++] = i;
2570       }
2571     }
2572     if (rank >= size - nleftover){/* this proc is a leftover processor */
2573       i = size-nleftover-1;
2574       j = 0;
2575       while (j < nsubcomm - nleftover){
2576         send_rank[nsends++] = i;
2577         i--; j++;
2578       }
2579     }
2580 
2581     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2582       for (i=0; i<nleftover; i++){
2583         recv_rank[nrecvs++] = size-nleftover+i;
2584       }
2585     }
2586 
2587     /* allocate sbuf_j, sbuf_a */
2588     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2589     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2590     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2591   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2592 
2593   /* copy mat's local entries into the buffers */
2594   if (reuse == MAT_INITIAL_MATRIX){
2595     rownz_max = 0;
2596     rptr = sbuf_j;
2597     cols = sbuf_j + rend-rstart + 1;
2598     vals = sbuf_a;
2599     rptr[0] = 0;
2600     for (i=0; i<rend-rstart; i++){
2601       row = i + rstart;
2602       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2603       ncols  = nzA + nzB;
2604       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2605       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2606       /* load the column indices for this row into cols */
2607       lwrite = 0;
2608       for (l=0; l<nzB; l++) {
2609         if ((ctmp = bmap[cworkB[l]]) < cstart){
2610           vals[lwrite]   = aworkB[l];
2611           cols[lwrite++] = ctmp;
2612         }
2613       }
2614       for (l=0; l<nzA; l++){
2615         vals[lwrite]   = aworkA[l];
2616         cols[lwrite++] = cstart + cworkA[l];
2617       }
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend){
2620           vals[lwrite]   = aworkB[l];
2621           cols[lwrite++] = ctmp;
2622         }
2623       }
2624       vals += ncols;
2625       cols += ncols;
2626       rptr[i+1] = rptr[i] + ncols;
2627       if (rownz_max < ncols) rownz_max = ncols;
2628     }
2629     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2630   } else { /* only copy matrix values into sbuf_a */
2631     rptr = sbuf_j;
2632     vals = sbuf_a;
2633     rptr[0] = 0;
2634     for (i=0; i<rend-rstart; i++){
2635       row = i + rstart;
2636       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2637       ncols  = nzA + nzB;
2638       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2639       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2640       lwrite = 0;
2641       for (l=0; l<nzB; l++) {
2642         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2643       }
2644       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2645       for (l=0; l<nzB; l++) {
2646         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2647       }
2648       vals += ncols;
2649       rptr[i+1] = rptr[i] + ncols;
2650     }
2651   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2652 
2653   /* send nzlocal to others, and recv other's nzlocal */
2654   /*--------------------------------------------------*/
2655   if (reuse == MAT_INITIAL_MATRIX){
2656     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2657     s_waits2 = s_waits3 + nsends;
2658     s_waits1 = s_waits2 + nsends;
2659     r_waits1 = s_waits1 + nsends;
2660     r_waits2 = r_waits1 + nrecvs;
2661     r_waits3 = r_waits2 + nrecvs;
2662   } else {
2663     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2664     r_waits3 = s_waits3 + nsends;
2665   }
2666 
2667   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2668   if (reuse == MAT_INITIAL_MATRIX){
2669     /* get new tags to keep the communication clean */
2670     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2671     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2672     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2673 
2674     /* post receives of other's nzlocal */
2675     for (i=0; i<nrecvs; i++){
2676       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2677     }
2678     /* send nzlocal to others */
2679     for (i=0; i<nsends; i++){
2680       sbuf_nz[i] = nzlocal;
2681       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2682     }
2683     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2684     count = nrecvs;
2685     while (count) {
2686       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2687       recv_rank[imdex] = recv_status.MPI_SOURCE;
2688       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2689       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2690 
2691       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2692       rbuf_nz[imdex] += i + 2;
2693       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2694       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2695       count--;
2696     }
2697     /* wait on sends of nzlocal */
2698     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2699     /* send mat->i,j to others, and recv from other's */
2700     /*------------------------------------------------*/
2701     for (i=0; i<nsends; i++){
2702       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2703       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2704     }
2705     /* wait on receives of mat->i,j */
2706     /*------------------------------*/
2707     count = nrecvs;
2708     while (count) {
2709       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2710       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2711       count--;
2712     }
2713     /* wait on sends of mat->i,j */
2714     /*---------------------------*/
2715     if (nsends) {
2716       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2717     }
2718   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2719 
2720   /* post receives, send and receive mat->a */
2721   /*----------------------------------------*/
2722   for (imdex=0; imdex<nrecvs; imdex++) {
2723     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2724   }
2725   for (i=0; i<nsends; i++){
2726     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2727   }
2728   count = nrecvs;
2729   while (count) {
2730     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2731     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2732     count--;
2733   }
2734   if (nsends) {
2735     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2736   }
2737 
2738   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2739 
2740   /* create redundant matrix */
2741   /*-------------------------*/
2742   if (reuse == MAT_INITIAL_MATRIX){
2743     /* compute rownz_max for preallocation */
2744     for (imdex=0; imdex<nrecvs; imdex++){
2745       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2746       rptr = rbuf_j[imdex];
2747       for (i=0; i<j; i++){
2748         ncols = rptr[i+1] - rptr[i];
2749         if (rownz_max < ncols) rownz_max = ncols;
2750       }
2751     }
2752 
2753     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2754     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2755     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2756     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2757     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2758     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2759   } else {
2760     C = *matredundant;
2761   }
2762 
2763   /* insert local matrix entries */
2764   rptr = sbuf_j;
2765   cols = sbuf_j + rend-rstart + 1;
2766   vals = sbuf_a;
2767   for (i=0; i<rend-rstart; i++){
2768     row   = i + rstart;
2769     ncols = rptr[i+1] - rptr[i];
2770     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2771     vals += ncols;
2772     cols += ncols;
2773   }
2774   /* insert received matrix entries */
2775   for (imdex=0; imdex<nrecvs; imdex++){
2776     rstart = rowrange[recv_rank[imdex]];
2777     rend   = rowrange[recv_rank[imdex]+1];
2778     rptr = rbuf_j[imdex];
2779     cols = rbuf_j[imdex] + rend-rstart + 1;
2780     vals = rbuf_a[imdex];
2781     for (i=0; i<rend-rstart; i++){
2782       row   = i + rstart;
2783       ncols = rptr[i+1] - rptr[i];
2784       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2785       vals += ncols;
2786       cols += ncols;
2787     }
2788   }
2789   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2792   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2793   if (reuse == MAT_INITIAL_MATRIX) {
2794     PetscContainer container;
2795     *matredundant = C;
2796     /* create a supporting struct and attach it to C for reuse */
2797     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2798     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2799     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2800     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2801     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2802     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2803 
2804     redund->nzlocal = nzlocal;
2805     redund->nsends  = nsends;
2806     redund->nrecvs  = nrecvs;
2807     redund->send_rank = send_rank;
2808     redund->recv_rank = recv_rank;
2809     redund->sbuf_nz = sbuf_nz;
2810     redund->rbuf_nz = rbuf_nz;
2811     redund->sbuf_j  = sbuf_j;
2812     redund->sbuf_a  = sbuf_a;
2813     redund->rbuf_j  = rbuf_j;
2814     redund->rbuf_a  = rbuf_a;
2815 
2816     redund->Destroy = C->ops->destroy;
2817     C->ops->destroy = MatDestroy_MatRedundant;
2818   }
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 #undef __FUNCT__
2823 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2824 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2825 {
2826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2827   PetscErrorCode ierr;
2828   PetscInt       i,*idxb = 0;
2829   PetscScalar    *va,*vb;
2830   Vec            vtmp;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2834   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2835   if (idx) {
2836     for (i=0; i<A->rmap->n; i++) {
2837       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2838     }
2839   }
2840 
2841   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2842   if (idx) {
2843     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2844   }
2845   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2846   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2847 
2848   for (i=0; i<A->rmap->n; i++){
2849     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2850       va[i] = vb[i];
2851       if (idx) idx[i] = a->garray[idxb[i]];
2852     }
2853   }
2854 
2855   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2856   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2857   ierr = PetscFree(idxb);CHKERRQ(ierr);
2858   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #undef __FUNCT__
2863 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2864 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2865 {
2866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2867   PetscErrorCode ierr;
2868   PetscInt       i,*idxb = 0;
2869   PetscScalar    *va,*vb;
2870   Vec            vtmp;
2871 
2872   PetscFunctionBegin;
2873   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2874   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2875   if (idx) {
2876     for (i=0; i<A->cmap->n; i++) {
2877       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2878     }
2879   }
2880 
2881   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2882   if (idx) {
2883     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2884   }
2885   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2886   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2887 
2888   for (i=0; i<A->rmap->n; i++){
2889     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2890       va[i] = vb[i];
2891       if (idx) idx[i] = a->garray[idxb[i]];
2892     }
2893   }
2894 
2895   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2896   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2897   ierr = PetscFree(idxb);CHKERRQ(ierr);
2898   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2904 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2905 {
2906   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2907   PetscInt       n      = A->rmap->n;
2908   PetscInt       cstart = A->cmap->rstart;
2909   PetscInt      *cmap   = mat->garray;
2910   PetscInt      *diagIdx, *offdiagIdx;
2911   Vec            diagV, offdiagV;
2912   PetscScalar   *a, *diagA, *offdiagA;
2913   PetscInt       r;
2914   PetscErrorCode ierr;
2915 
2916   PetscFunctionBegin;
2917   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2918   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2919   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2920   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2921   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2922   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2923   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2924   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2925   for(r = 0; r < n; ++r) {
2926     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2927       a[r]   = diagA[r];
2928       idx[r] = cstart + diagIdx[r];
2929     } else {
2930       a[r]   = offdiagA[r];
2931       idx[r] = cmap[offdiagIdx[r]];
2932     }
2933   }
2934   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2935   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2936   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2937   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2938   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2939   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2945 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2946 {
2947   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2948   PetscInt       n      = A->rmap->n;
2949   PetscInt       cstart = A->cmap->rstart;
2950   PetscInt      *cmap   = mat->garray;
2951   PetscInt      *diagIdx, *offdiagIdx;
2952   Vec            diagV, offdiagV;
2953   PetscScalar   *a, *diagA, *offdiagA;
2954   PetscInt       r;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2959   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2960   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2961   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2962   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2963   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2964   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2965   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2966   for(r = 0; r < n; ++r) {
2967     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2968       a[r]   = diagA[r];
2969       idx[r] = cstart + diagIdx[r];
2970     } else {
2971       a[r]   = offdiagA[r];
2972       idx[r] = cmap[offdiagIdx[r]];
2973     }
2974   }
2975   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2976   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2978   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2979   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2980   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2986 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2987 {
2988   PetscErrorCode ierr;
2989   Mat            *dummy;
2990 
2991   PetscFunctionBegin;
2992   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2993   *newmat = *dummy;
2994   ierr = PetscFree(dummy);CHKERRQ(ierr);
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2999 
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3002 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3003 {
3004   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3005   PetscErrorCode ierr;
3006 
3007   PetscFunctionBegin;
3008   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 
3013 /* -------------------------------------------------------------------*/
3014 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3015        MatGetRow_MPIAIJ,
3016        MatRestoreRow_MPIAIJ,
3017        MatMult_MPIAIJ,
3018 /* 4*/ MatMultAdd_MPIAIJ,
3019        MatMultTranspose_MPIAIJ,
3020        MatMultTransposeAdd_MPIAIJ,
3021 #ifdef PETSC_HAVE_PBGL
3022        MatSolve_MPIAIJ,
3023 #else
3024        0,
3025 #endif
3026        0,
3027        0,
3028 /*10*/ 0,
3029        0,
3030        0,
3031        MatSOR_MPIAIJ,
3032        MatTranspose_MPIAIJ,
3033 /*15*/ MatGetInfo_MPIAIJ,
3034        MatEqual_MPIAIJ,
3035        MatGetDiagonal_MPIAIJ,
3036        MatDiagonalScale_MPIAIJ,
3037        MatNorm_MPIAIJ,
3038 /*20*/ MatAssemblyBegin_MPIAIJ,
3039        MatAssemblyEnd_MPIAIJ,
3040        MatSetOption_MPIAIJ,
3041        MatZeroEntries_MPIAIJ,
3042 /*24*/ MatZeroRows_MPIAIJ,
3043        0,
3044 #ifdef PETSC_HAVE_PBGL
3045        0,
3046 #else
3047        0,
3048 #endif
3049        0,
3050        0,
3051 /*29*/ MatSetUp_MPIAIJ,
3052 #ifdef PETSC_HAVE_PBGL
3053        0,
3054 #else
3055        0,
3056 #endif
3057        0,
3058        0,
3059        0,
3060 /*34*/ MatDuplicate_MPIAIJ,
3061        0,
3062        0,
3063        0,
3064        0,
3065 /*39*/ MatAXPY_MPIAIJ,
3066        MatGetSubMatrices_MPIAIJ,
3067        MatIncreaseOverlap_MPIAIJ,
3068        MatGetValues_MPIAIJ,
3069        MatCopy_MPIAIJ,
3070 /*44*/ MatGetRowMax_MPIAIJ,
3071        MatScale_MPIAIJ,
3072        0,
3073        0,
3074        MatZeroRowsColumns_MPIAIJ,
3075 /*49*/ 0,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*54*/ MatFDColoringCreate_MPIAIJ,
3081        0,
3082        MatSetUnfactored_MPIAIJ,
3083        0, /* MatPermute_MPIAIJ, impl currently broken */
3084        0,
3085 /*59*/ MatGetSubMatrix_MPIAIJ,
3086        MatDestroy_MPIAIJ,
3087        MatView_MPIAIJ,
3088        0,
3089        0,
3090 /*64*/ 0,
3091        0,
3092        0,
3093        0,
3094        0,
3095 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3096        MatGetRowMinAbs_MPIAIJ,
3097        0,
3098        MatSetColoring_MPIAIJ,
3099 #if defined(PETSC_HAVE_ADIC)
3100        MatSetValuesAdic_MPIAIJ,
3101 #else
3102        0,
3103 #endif
3104        MatSetValuesAdifor_MPIAIJ,
3105 /*75*/ MatFDColoringApply_AIJ,
3106        0,
3107        0,
3108        0,
3109        0,
3110 /*80*/ 0,
3111        0,
3112        0,
3113 /*83*/ MatLoad_MPIAIJ,
3114        0,
3115        0,
3116        0,
3117        0,
3118        0,
3119 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3120        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3121        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3122        MatPtAP_Basic,
3123        MatPtAPSymbolic_MPIAIJ,
3124 /*94*/ MatPtAPNumeric_MPIAIJ,
3125        0,
3126        0,
3127        0,
3128        0,
3129 /*99*/ 0,
3130        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3131        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3132        MatConjugate_MPIAIJ,
3133        0,
3134 /*104*/MatSetValuesRow_MPIAIJ,
3135        MatRealPart_MPIAIJ,
3136        MatImaginaryPart_MPIAIJ,
3137        0,
3138        0,
3139 /*109*/0,
3140        MatGetRedundantMatrix_MPIAIJ,
3141        MatGetRowMin_MPIAIJ,
3142        0,
3143        0,
3144 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3145        0,
3146        0,
3147        0,
3148        0,
3149 /*119*/0,
3150        0,
3151        0,
3152        0,
3153        MatGetMultiProcBlock_MPIAIJ,
3154 /*124*/MatFindNonZeroRows_MPIAIJ,
3155        MatGetColumnNorms_MPIAIJ,
3156        MatInvertBlockDiagonal_MPIAIJ,
3157        0,
3158        MatGetSubMatricesParallel_MPIAIJ,
3159 /*129*/0,
3160        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3161        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3162        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3163        0,
3164 /*134*/0,
3165        0,
3166        0,
3167        0,
3168        0
3169 };
3170 
3171 /* ----------------------------------------------------------------------------------------*/
3172 
3173 EXTERN_C_BEGIN
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3176 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3177 {
3178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3183   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 EXTERN_C_END
3187 
3188 EXTERN_C_BEGIN
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3191 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3192 {
3193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3198   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 EXTERN_C_END
3202 
3203 EXTERN_C_BEGIN
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3206 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3207 {
3208   Mat_MPIAIJ     *b;
3209   PetscErrorCode ierr;
3210   PetscInt       i;
3211   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3212 
3213   PetscFunctionBegin;
3214   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3215   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3216   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3217   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3218   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3219   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3220 
3221   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3222   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3223   if (d_nnz) {
3224     for (i=0; i<B->rmap->n; i++) {
3225       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3226     }
3227   }
3228   if (o_nnz) {
3229     for (i=0; i<B->rmap->n; i++) {
3230       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3231     }
3232   }
3233   b = (Mat_MPIAIJ*)B->data;
3234 
3235   if (!B->preallocated) {
3236     /* Explicitly create 2 MATSEQAIJ matrices. */
3237     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3238     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3239     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3240     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3241     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3242     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3243     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3244     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3245     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3246     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3247   }
3248 
3249   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3250   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3251   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3252   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3253   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3254   B->preallocated = PETSC_TRUE;
3255   PetscFunctionReturn(0);
3256 }
3257 EXTERN_C_END
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3261 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3262 {
3263   Mat            mat;
3264   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3265   PetscErrorCode ierr;
3266 
3267   PetscFunctionBegin;
3268   *newmat       = 0;
3269   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3270   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3271   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3272   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3273   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3274   a    = (Mat_MPIAIJ*)mat->data;
3275 
3276   mat->factortype    = matin->factortype;
3277   mat->rmap->bs      = matin->rmap->bs;
3278   mat->cmap->bs      = matin->cmap->bs;
3279   mat->assembled    = PETSC_TRUE;
3280   mat->insertmode   = NOT_SET_VALUES;
3281   mat->preallocated = PETSC_TRUE;
3282 
3283   a->size           = oldmat->size;
3284   a->rank           = oldmat->rank;
3285   a->donotstash     = oldmat->donotstash;
3286   a->roworiented    = oldmat->roworiented;
3287   a->rowindices     = 0;
3288   a->rowvalues      = 0;
3289   a->getrowactive   = PETSC_FALSE;
3290 
3291   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3292   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3293 
3294   if (oldmat->colmap) {
3295 #if defined (PETSC_USE_CTABLE)
3296     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3297 #else
3298     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301 #endif
3302   } else a->colmap = 0;
3303   if (oldmat->garray) {
3304     PetscInt len;
3305     len  = oldmat->B->cmap->n;
3306     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3307     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3308     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3309   } else a->garray = 0;
3310 
3311   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3312   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3313   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3314   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3315   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3316   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3317   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3318   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3319   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3320   *newmat = mat;
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 
3325 
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatLoad_MPIAIJ"
3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329 {
3330   PetscScalar    *vals,*svals;
3331   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3332   PetscErrorCode ierr;
3333   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3334   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3336   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3337   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3338   int            fd;
3339   PetscInt       bs = 1;
3340 
3341   PetscFunctionBegin;
3342   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3343   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3344   if (!rank) {
3345     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3346     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3347     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3348   }
3349 
3350   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3351   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3352   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3353 
3354   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3355 
3356   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3357   M = header[1]; N = header[2];
3358   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3359   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3360   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3361 
3362   /* If global sizes are set, check if they are consistent with that given in the file */
3363   if (sizesset) {
3364     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3365   }
3366   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3367   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3368 
3369   /* determine ownership of all (block) rows */
3370   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3371   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3372   else m = newMat->rmap->n; /* Set by user */
3373 
3374   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3375   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3376 
3377   /* First process needs enough room for process with most rows */
3378   if (!rank) {
3379     mmax       = rowners[1];
3380     for (i=2; i<size; i++) {
3381       mmax = PetscMax(mmax,rowners[i]);
3382     }
3383   } else mmax = m;
3384 
3385   rowners[0] = 0;
3386   for (i=2; i<=size; i++) {
3387     rowners[i] += rowners[i-1];
3388   }
3389   rstart = rowners[rank];
3390   rend   = rowners[rank+1];
3391 
3392   /* distribute row lengths to all processors */
3393   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3394   if (!rank) {
3395     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3396     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3397     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3398     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3399     for (j=0; j<m; j++) {
3400       procsnz[0] += ourlens[j];
3401     }
3402     for (i=1; i<size; i++) {
3403       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3404       /* calculate the number of nonzeros on each processor */
3405       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3406         procsnz[i] += rowlengths[j];
3407       }
3408       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3409     }
3410     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3411   } else {
3412     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3413   }
3414 
3415   if (!rank) {
3416     /* determine max buffer needed and allocate it */
3417     maxnz = 0;
3418     for (i=0; i<size; i++) {
3419       maxnz = PetscMax(maxnz,procsnz[i]);
3420     }
3421     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3422 
3423     /* read in my part of the matrix column indices  */
3424     nz   = procsnz[0];
3425     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3426     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3427 
3428     /* read in every one elses and ship off */
3429     for (i=1; i<size; i++) {
3430       nz     = procsnz[i];
3431       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3432       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3433     }
3434     ierr = PetscFree(cols);CHKERRQ(ierr);
3435   } else {
3436     /* determine buffer space needed for message */
3437     nz = 0;
3438     for (i=0; i<m; i++) {
3439       nz += ourlens[i];
3440     }
3441     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3442 
3443     /* receive message of column indices*/
3444     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3445   }
3446 
3447   /* determine column ownership if matrix is not square */
3448   if (N != M) {
3449     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3450     else n = newMat->cmap->n;
3451     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3452     cstart = cend - n;
3453   } else {
3454     cstart = rstart;
3455     cend   = rend;
3456     n      = cend - cstart;
3457   }
3458 
3459   /* loop over local rows, determining number of off diagonal entries */
3460   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3461   jj = 0;
3462   for (i=0; i<m; i++) {
3463     for (j=0; j<ourlens[i]; j++) {
3464       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3465       jj++;
3466     }
3467   }
3468 
3469   for (i=0; i<m; i++) {
3470     ourlens[i] -= offlens[i];
3471   }
3472   if (!sizesset) {
3473     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3474   }
3475 
3476   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3477 
3478   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3479 
3480   for (i=0; i<m; i++) {
3481     ourlens[i] += offlens[i];
3482   }
3483 
3484   if (!rank) {
3485     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3486 
3487     /* read in my part of the matrix numerical values  */
3488     nz   = procsnz[0];
3489     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3490 
3491     /* insert into matrix */
3492     jj      = rstart;
3493     smycols = mycols;
3494     svals   = vals;
3495     for (i=0; i<m; i++) {
3496       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3497       smycols += ourlens[i];
3498       svals   += ourlens[i];
3499       jj++;
3500     }
3501 
3502     /* read in other processors and ship out */
3503     for (i=1; i<size; i++) {
3504       nz     = procsnz[i];
3505       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3506       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3507     }
3508     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3509   } else {
3510     /* receive numeric values */
3511     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3512 
3513     /* receive message of values*/
3514     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3515 
3516     /* insert into matrix */
3517     jj      = rstart;
3518     smycols = mycols;
3519     svals   = vals;
3520     for (i=0; i<m; i++) {
3521       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3522       smycols += ourlens[i];
3523       svals   += ourlens[i];
3524       jj++;
3525     }
3526   }
3527   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3528   ierr = PetscFree(vals);CHKERRQ(ierr);
3529   ierr = PetscFree(mycols);CHKERRQ(ierr);
3530   ierr = PetscFree(rowners);CHKERRQ(ierr);
3531   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533 
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3540 {
3541   PetscErrorCode ierr;
3542   IS             iscol_local;
3543   PetscInt       csize;
3544 
3545   PetscFunctionBegin;
3546   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3547   if (call == MAT_REUSE_MATRIX) {
3548     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3549     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3550   } else {
3551     PetscInt cbs;
3552     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3553     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3554     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3555   }
3556   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3557   if (call == MAT_INITIAL_MATRIX) {
3558     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3559     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3560   }
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3567 /*
3568     Not great since it makes two copies of the submatrix, first an SeqAIJ
3569   in local and then by concatenating the local matrices the end result.
3570   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3571 
3572   Note: This requires a sequential iscol with all indices.
3573 */
3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3575 {
3576   PetscErrorCode ierr;
3577   PetscMPIInt    rank,size;
3578   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3579   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3580   PetscBool      allcolumns, colflag;
3581   Mat            M,Mreuse;
3582   MatScalar      *vwork,*aa;
3583   MPI_Comm       comm = ((PetscObject)mat)->comm;
3584   Mat_SeqAIJ     *aij;
3585 
3586 
3587   PetscFunctionBegin;
3588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3590 
3591   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3592   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3593   if (colflag && ncol == mat->cmap->N){
3594     allcolumns = PETSC_TRUE;
3595   } else {
3596     allcolumns = PETSC_FALSE;
3597   }
3598   if (call ==  MAT_REUSE_MATRIX) {
3599     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3600     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3601     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3602   } else {
3603     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3604   }
3605 
3606   /*
3607       m - number of local rows
3608       n - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3612   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3613   if (call == MAT_INITIAL_MATRIX) {
3614     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3615     ii  = aij->i;
3616     jj  = aij->j;
3617 
3618     /*
3619         Determine the number of non-zeros in the diagonal and off-diagonal
3620         portions of the matrix in order to do correct preallocation
3621     */
3622 
3623     /* first get start and end of "diagonal" columns */
3624     if (csize == PETSC_DECIDE) {
3625       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3626       if (mglobal == n) { /* square matrix */
3627 	nlocal = m;
3628       } else {
3629         nlocal = n/size + ((n % size) > rank);
3630       }
3631     } else {
3632       nlocal = csize;
3633     }
3634     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3635     rstart = rend - nlocal;
3636     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3637 
3638     /* next, compute all the lengths */
3639     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3640     olens = dlens + m;
3641     for (i=0; i<m; i++) {
3642       jend = ii[i+1] - ii[i];
3643       olen = 0;
3644       dlen = 0;
3645       for (j=0; j<jend; j++) {
3646         if (*jj < rstart || *jj >= rend) olen++;
3647         else dlen++;
3648         jj++;
3649       }
3650       olens[i] = olen;
3651       dlens[i] = dlen;
3652     }
3653     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3654     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3655     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3656     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3657     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3658     ierr = PetscFree(dlens);CHKERRQ(ierr);
3659   } else {
3660     PetscInt ml,nl;
3661 
3662     M = *newmat;
3663     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3664     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3665     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3666     /*
3667          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3668        rather than the slower MatSetValues().
3669     */
3670     M->was_assembled = PETSC_TRUE;
3671     M->assembled     = PETSC_FALSE;
3672   }
3673   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3674   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3675   ii  = aij->i;
3676   jj  = aij->j;
3677   aa  = aij->a;
3678   for (i=0; i<m; i++) {
3679     row   = rstart + i;
3680     nz    = ii[i+1] - ii[i];
3681     cwork = jj;     jj += nz;
3682     vwork = aa;     aa += nz;
3683     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3684   }
3685 
3686   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   *newmat = M;
3689 
3690   /* save submatrix used in processor for next request */
3691   if (call ==  MAT_INITIAL_MATRIX) {
3692     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3693     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3694   }
3695 
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 EXTERN_C_BEGIN
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3702 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3703 {
3704   PetscInt       m,cstart, cend,j,nnz,i,d;
3705   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3706   const PetscInt *JJ;
3707   PetscScalar    *values;
3708   PetscErrorCode ierr;
3709 
3710   PetscFunctionBegin;
3711   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3712 
3713   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3714   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3715   m      = B->rmap->n;
3716   cstart = B->cmap->rstart;
3717   cend   = B->cmap->rend;
3718   rstart = B->rmap->rstart;
3719 
3720   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3721 
3722 #if defined(PETSC_USE_DEBUGGING)
3723   for (i=0; i<m; i++) {
3724     nnz     = Ii[i+1]- Ii[i];
3725     JJ      = J + Ii[i];
3726     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3727     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3728     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3729   }
3730 #endif
3731 
3732   for (i=0; i<m; i++) {
3733     nnz     = Ii[i+1]- Ii[i];
3734     JJ      = J + Ii[i];
3735     nnz_max = PetscMax(nnz_max,nnz);
3736     d       = 0;
3737     for (j=0; j<nnz; j++) {
3738       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3739     }
3740     d_nnz[i] = d;
3741     o_nnz[i] = nnz - d;
3742   }
3743   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3744   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3745 
3746   if (v) values = (PetscScalar*)v;
3747   else {
3748     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3749     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3750   }
3751 
3752   for (i=0; i<m; i++) {
3753     ii   = i + rstart;
3754     nnz  = Ii[i+1]- Ii[i];
3755     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3756   }
3757   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3758   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3759 
3760   if (!v) {
3761     ierr = PetscFree(values);CHKERRQ(ierr);
3762   }
3763   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3764   PetscFunctionReturn(0);
3765 }
3766 EXTERN_C_END
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3770 /*@
3771    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3772    (the default parallel PETSc format).
3773 
3774    Collective on MPI_Comm
3775 
3776    Input Parameters:
3777 +  B - the matrix
3778 .  i - the indices into j for the start of each local row (starts with zero)
3779 .  j - the column indices for each local row (starts with zero)
3780 -  v - optional values in the matrix
3781 
3782    Level: developer
3783 
3784    Notes:
3785        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3786      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3787      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3788 
3789        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3790 
3791        The format which is used for the sparse matrix input, is equivalent to a
3792     row-major ordering.. i.e for the following matrix, the input data expected is
3793     as shown:
3794 
3795         1 0 0
3796         2 0 3     P0
3797        -------
3798         4 5 6     P1
3799 
3800      Process0 [P0]: rows_owned=[0,1]
3801         i =  {0,1,3}  [size = nrow+1  = 2+1]
3802         j =  {0,0,2}  [size = nz = 6]
3803         v =  {1,2,3}  [size = nz = 6]
3804 
3805      Process1 [P1]: rows_owned=[2]
3806         i =  {0,3}    [size = nrow+1  = 1+1]
3807         j =  {0,1,2}  [size = nz = 6]
3808         v =  {4,5,6}  [size = nz = 6]
3809 
3810 .keywords: matrix, aij, compressed row, sparse, parallel
3811 
3812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3813           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3814 @*/
3815 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3816 {
3817   PetscErrorCode ierr;
3818 
3819   PetscFunctionBegin;
3820   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 #undef __FUNCT__
3825 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3826 /*@C
3827    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3828    (the default parallel PETSc format).  For good matrix assembly performance
3829    the user should preallocate the matrix storage by setting the parameters
3830    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3831    performance can be increased by more than a factor of 50.
3832 
3833    Collective on MPI_Comm
3834 
3835    Input Parameters:
3836 +  A - the matrix
3837 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3838            (same value is used for all local rows)
3839 .  d_nnz - array containing the number of nonzeros in the various rows of the
3840            DIAGONAL portion of the local submatrix (possibly different for each row)
3841            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3842            The size of this array is equal to the number of local rows, i.e 'm'.
3843            For matrices that will be factored, you must leave room for (and set)
3844            the diagonal entry even if it is zero.
3845 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3846            submatrix (same value is used for all local rows).
3847 -  o_nnz - array containing the number of nonzeros in the various rows of the
3848            OFF-DIAGONAL portion of the local submatrix (possibly different for
3849            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3850            structure. The size of this array is equal to the number
3851            of local rows, i.e 'm'.
3852 
3853    If the *_nnz parameter is given then the *_nz parameter is ignored
3854 
3855    The AIJ format (also called the Yale sparse matrix format or
3856    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3857    storage.  The stored row and column indices begin with zero.
3858    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3859 
3860    The parallel matrix is partitioned such that the first m0 rows belong to
3861    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3862    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3863 
3864    The DIAGONAL portion of the local submatrix of a processor can be defined
3865    as the submatrix which is obtained by extraction the part corresponding to
3866    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3867    first row that belongs to the processor, r2 is the last row belonging to
3868    the this processor, and c1-c2 is range of indices of the local part of a
3869    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3870    common case of a square matrix, the row and column ranges are the same and
3871    the DIAGONAL part is also square. The remaining portion of the local
3872    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3873 
3874    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3875 
3876    You can call MatGetInfo() to get information on how effective the preallocation was;
3877    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3878    You can also run with the option -info and look for messages with the string
3879    malloc in them to see if additional memory allocation was needed.
3880 
3881    Example usage:
3882 
3883    Consider the following 8x8 matrix with 34 non-zero values, that is
3884    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3885    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3886    as follows:
3887 
3888 .vb
3889             1  2  0  |  0  3  0  |  0  4
3890     Proc0   0  5  6  |  7  0  0  |  8  0
3891             9  0 10  | 11  0  0  | 12  0
3892     -------------------------------------
3893            13  0 14  | 15 16 17  |  0  0
3894     Proc1   0 18  0  | 19 20 21  |  0  0
3895             0  0  0  | 22 23  0  | 24  0
3896     -------------------------------------
3897     Proc2  25 26 27  |  0  0 28  | 29  0
3898            30  0  0  | 31 32 33  |  0 34
3899 .ve
3900 
3901    This can be represented as a collection of submatrices as:
3902 
3903 .vb
3904       A B C
3905       D E F
3906       G H I
3907 .ve
3908 
3909    Where the submatrices A,B,C are owned by proc0, D,E,F are
3910    owned by proc1, G,H,I are owned by proc2.
3911 
3912    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3913    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'M','N' parameters are 8,8, and have the same values on all procs.
3915 
3916    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3917    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3918    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3919    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3920    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3921    matrix, ans [DF] as another SeqAIJ matrix.
3922 
3923    When d_nz, o_nz parameters are specified, d_nz storage elements are
3924    allocated for every row of the local diagonal submatrix, and o_nz
3925    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3926    One way to choose d_nz and o_nz is to use the max nonzerors per local
3927    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3928    In this case, the values of d_nz,o_nz are:
3929 .vb
3930      proc0 : dnz = 2, o_nz = 2
3931      proc1 : dnz = 3, o_nz = 2
3932      proc2 : dnz = 1, o_nz = 4
3933 .ve
3934    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3935    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3936    for proc3. i.e we are using 12+15+10=37 storage locations to store
3937    34 values.
3938 
3939    When d_nnz, o_nnz parameters are specified, the storage is specified
3940    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3941    In the above case the values for d_nnz,o_nnz are:
3942 .vb
3943      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3944      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3945      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3946 .ve
3947    Here the space allocated is sum of all the above values i.e 34, and
3948    hence pre-allocation is perfect.
3949 
3950    Level: intermediate
3951 
3952 .keywords: matrix, aij, compressed row, sparse, parallel
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3955           MPIAIJ, MatGetInfo()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3963   PetscValidType(B,1);
3964   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 #undef __FUNCT__
3969 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3970 /*@
3971      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3972          CSR format the local rows.
3973 
3974    Collective on MPI_Comm
3975 
3976    Input Parameters:
3977 +  comm - MPI communicator
3978 .  m - number of local rows (Cannot be PETSC_DECIDE)
3979 .  n - This value should be the same as the local size used in creating the
3980        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3981        calculated if N is given) For square matrices n is almost always m.
3982 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3983 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3984 .   i - row indices
3985 .   j - column indices
3986 -   a - matrix values
3987 
3988    Output Parameter:
3989 .   mat - the matrix
3990 
3991    Level: intermediate
3992 
3993    Notes:
3994        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3995      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3996      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3997 
3998        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3999 
4000        The format which is used for the sparse matrix input, is equivalent to a
4001     row-major ordering.. i.e for the following matrix, the input data expected is
4002     as shown:
4003 
4004         1 0 0
4005         2 0 3     P0
4006        -------
4007         4 5 6     P1
4008 
4009      Process0 [P0]: rows_owned=[0,1]
4010         i =  {0,1,3}  [size = nrow+1  = 2+1]
4011         j =  {0,0,2}  [size = nz = 6]
4012         v =  {1,2,3}  [size = nz = 6]
4013 
4014      Process1 [P1]: rows_owned=[2]
4015         i =  {0,3}    [size = nrow+1  = 1+1]
4016         j =  {0,1,2}  [size = nz = 6]
4017         v =  {4,5,6}  [size = nz = 6]
4018 
4019 .keywords: matrix, aij, compressed row, sparse, parallel
4020 
4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4022           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4023 @*/
4024 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4025 {
4026   PetscErrorCode ierr;
4027 
4028   PetscFunctionBegin;
4029   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4030   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4031   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4032   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4033   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4034   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4035   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4036   PetscFunctionReturn(0);
4037 }
4038 
4039 #undef __FUNCT__
4040 #define __FUNCT__ "MatCreateAIJ"
4041 /*@C
4042    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4043    (the default parallel PETSc format).  For good matrix assembly performance
4044    the user should preallocate the matrix storage by setting the parameters
4045    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4046    performance can be increased by more than a factor of 50.
4047 
4048    Collective on MPI_Comm
4049 
4050    Input Parameters:
4051 +  comm - MPI communicator
4052 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4053            This value should be the same as the local size used in creating the
4054            y vector for the matrix-vector product y = Ax.
4055 .  n - This value should be the same as the local size used in creating the
4056        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4057        calculated if N is given) For square matrices n is almost always m.
4058 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4059 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4060 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4061            (same value is used for all local rows)
4062 .  d_nnz - array containing the number of nonzeros in the various rows of the
4063            DIAGONAL portion of the local submatrix (possibly different for each row)
4064            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4065            The size of this array is equal to the number of local rows, i.e 'm'.
4066 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4067            submatrix (same value is used for all local rows).
4068 -  o_nnz - array containing the number of nonzeros in the various rows of the
4069            OFF-DIAGONAL portion of the local submatrix (possibly different for
4070            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4071            structure. The size of this array is equal to the number
4072            of local rows, i.e 'm'.
4073 
4074    Output Parameter:
4075 .  A - the matrix
4076 
4077    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4078    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4079    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4080 
4081    Notes:
4082    If the *_nnz parameter is given then the *_nz parameter is ignored
4083 
4084    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4085    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4086    storage requirements for this matrix.
4087 
4088    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4089    processor than it must be used on all processors that share the object for
4090    that argument.
4091 
4092    The user MUST specify either the local or global matrix dimensions
4093    (possibly both).
4094 
4095    The parallel matrix is partitioned across processors such that the
4096    first m0 rows belong to process 0, the next m1 rows belong to
4097    process 1, the next m2 rows belong to process 2 etc.. where
4098    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4099    values corresponding to [m x N] submatrix.
4100 
4101    The columns are logically partitioned with the n0 columns belonging
4102    to 0th partition, the next n1 columns belonging to the next
4103    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4104 
4105    The DIAGONAL portion of the local submatrix on any given processor
4106    is the submatrix corresponding to the rows and columns m,n
4107    corresponding to the given processor. i.e diagonal matrix on
4108    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4109    etc. The remaining portion of the local submatrix [m x (N-n)]
4110    constitute the OFF-DIAGONAL portion. The example below better
4111    illustrates this concept.
4112 
4113    For a square global matrix we define each processor's diagonal portion
4114    to be its local rows and the corresponding columns (a square submatrix);
4115    each processor's off-diagonal portion encompasses the remainder of the
4116    local matrix (a rectangular submatrix).
4117 
4118    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4119 
4120    When calling this routine with a single process communicator, a matrix of
4121    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4122    type of communicator, use the construction mechanism:
4123      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4124 
4125    By default, this format uses inodes (identical nodes) when possible.
4126    We search for consecutive rows with the same nonzero structure, thereby
4127    reusing matrix information to achieve increased efficiency.
4128 
4129    Options Database Keys:
4130 +  -mat_no_inode  - Do not use inodes
4131 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4132 -  -mat_aij_oneindex - Internally use indexing starting at 1
4133         rather than 0.  Note that when calling MatSetValues(),
4134         the user still MUST index entries starting at 0!
4135 
4136 
4137    Example usage:
4138 
4139    Consider the following 8x8 matrix with 34 non-zero values, that is
4140    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4141    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4142    as follows:
4143 
4144 .vb
4145             1  2  0  |  0  3  0  |  0  4
4146     Proc0   0  5  6  |  7  0  0  |  8  0
4147             9  0 10  | 11  0  0  | 12  0
4148     -------------------------------------
4149            13  0 14  | 15 16 17  |  0  0
4150     Proc1   0 18  0  | 19 20 21  |  0  0
4151             0  0  0  | 22 23  0  | 24  0
4152     -------------------------------------
4153     Proc2  25 26 27  |  0  0 28  | 29  0
4154            30  0  0  | 31 32 33  |  0 34
4155 .ve
4156 
4157    This can be represented as a collection of submatrices as:
4158 
4159 .vb
4160       A B C
4161       D E F
4162       G H I
4163 .ve
4164 
4165    Where the submatrices A,B,C are owned by proc0, D,E,F are
4166    owned by proc1, G,H,I are owned by proc2.
4167 
4168    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4169    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4170    The 'M','N' parameters are 8,8, and have the same values on all procs.
4171 
4172    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4173    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4174    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4175    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4176    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4177    matrix, ans [DF] as another SeqAIJ matrix.
4178 
4179    When d_nz, o_nz parameters are specified, d_nz storage elements are
4180    allocated for every row of the local diagonal submatrix, and o_nz
4181    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4182    One way to choose d_nz and o_nz is to use the max nonzerors per local
4183    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4184    In this case, the values of d_nz,o_nz are:
4185 .vb
4186      proc0 : dnz = 2, o_nz = 2
4187      proc1 : dnz = 3, o_nz = 2
4188      proc2 : dnz = 1, o_nz = 4
4189 .ve
4190    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4191    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4192    for proc3. i.e we are using 12+15+10=37 storage locations to store
4193    34 values.
4194 
4195    When d_nnz, o_nnz parameters are specified, the storage is specified
4196    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4197    In the above case the values for d_nnz,o_nnz are:
4198 .vb
4199      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4200      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4201      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4202 .ve
4203    Here the space allocated is sum of all the above values i.e 34, and
4204    hence pre-allocation is perfect.
4205 
4206    Level: intermediate
4207 
4208 .keywords: matrix, aij, compressed row, sparse, parallel
4209 
4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4211           MPIAIJ, MatCreateMPIAIJWithArrays()
4212 @*/
4213 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4214 {
4215   PetscErrorCode ierr;
4216   PetscMPIInt    size;
4217 
4218   PetscFunctionBegin;
4219   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4220   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4222   if (size > 1) {
4223     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4224     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4225   } else {
4226     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4227     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4228   }
4229   PetscFunctionReturn(0);
4230 }
4231 
4232 #undef __FUNCT__
4233 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4234 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4235 {
4236   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4237 
4238   PetscFunctionBegin;
4239   *Ad     = a->A;
4240   *Ao     = a->B;
4241   *colmap = a->garray;
4242   PetscFunctionReturn(0);
4243 }
4244 
4245 #undef __FUNCT__
4246 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4247 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4248 {
4249   PetscErrorCode ierr;
4250   PetscInt       i;
4251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4252 
4253   PetscFunctionBegin;
4254   if (coloring->ctype == IS_COLORING_GLOBAL) {
4255     ISColoringValue *allcolors,*colors;
4256     ISColoring      ocoloring;
4257 
4258     /* set coloring for diagonal portion */
4259     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4260 
4261     /* set coloring for off-diagonal portion */
4262     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4263     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4264     for (i=0; i<a->B->cmap->n; i++) {
4265       colors[i] = allcolors[a->garray[i]];
4266     }
4267     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4268     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4269     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4270     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4271   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4272     ISColoringValue *colors;
4273     PetscInt        *larray;
4274     ISColoring      ocoloring;
4275 
4276     /* set coloring for diagonal portion */
4277     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4278     for (i=0; i<a->A->cmap->n; i++) {
4279       larray[i] = i + A->cmap->rstart;
4280     }
4281     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4282     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4283     for (i=0; i<a->A->cmap->n; i++) {
4284       colors[i] = coloring->colors[larray[i]];
4285     }
4286     ierr = PetscFree(larray);CHKERRQ(ierr);
4287     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4288     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4289     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4290 
4291     /* set coloring for off-diagonal portion */
4292     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4293     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4294     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4295     for (i=0; i<a->B->cmap->n; i++) {
4296       colors[i] = coloring->colors[larray[i]];
4297     }
4298     ierr = PetscFree(larray);CHKERRQ(ierr);
4299     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4300     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4301     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4302   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4303 
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #if defined(PETSC_HAVE_ADIC)
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4310 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4311 {
4312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4313   PetscErrorCode ierr;
4314 
4315   PetscFunctionBegin;
4316   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4317   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4318   PetscFunctionReturn(0);
4319 }
4320 #endif
4321 
4322 #undef __FUNCT__
4323 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4324 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4325 {
4326   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4327   PetscErrorCode ierr;
4328 
4329   PetscFunctionBegin;
4330   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4331   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 #undef __FUNCT__
4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4337 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4338 {
4339   PetscErrorCode ierr;
4340   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4341   PetscInt       *indx;
4342 
4343   PetscFunctionBegin;
4344   /* This routine will ONLY return MPIAIJ type matrix */
4345   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4346   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4347   if (n == PETSC_DECIDE){
4348     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4349   }
4350   /* Check sum(n) = N */
4351   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4352   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4353 
4354   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4355   rstart -= m;
4356 
4357   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4358   for (i=0;i<m;i++) {
4359     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4360     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4361     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4362   }
4363 
4364   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4365   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4366   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4367   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4368   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4369   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4375 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4376 {
4377   PetscErrorCode ierr;
4378   PetscInt       m,N,i,rstart,nnz,Ii;
4379   PetscInt       *indx;
4380   PetscScalar    *values;
4381 
4382   PetscFunctionBegin;
4383   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4384   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4385   for (i=0;i<m;i++) {
4386     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4387     Ii    = i + rstart;
4388     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4389     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4390   }
4391   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4392   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   PetscFunctionReturn(0);
4394 }
4395 
4396 #undef __FUNCT__
4397 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4398 /*@
4399       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4400                  matrices from each processor
4401 
4402     Collective on MPI_Comm
4403 
4404    Input Parameters:
4405 +    comm - the communicators the parallel matrix will live on
4406 .    inmat - the input sequential matrices
4407 .    n - number of local columns (or PETSC_DECIDE)
4408 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4409 
4410    Output Parameter:
4411 .    outmat - the parallel matrix generated
4412 
4413     Level: advanced
4414 
4415    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4416 
4417 @*/
4418 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4419 {
4420   PetscErrorCode ierr;
4421 
4422   PetscFunctionBegin;
4423   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4424   if (scall == MAT_INITIAL_MATRIX){
4425     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4426   }
4427   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4428   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatFileSplit"
4434 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4435 {
4436   PetscErrorCode    ierr;
4437   PetscMPIInt       rank;
4438   PetscInt          m,N,i,rstart,nnz;
4439   size_t            len;
4440   const PetscInt    *indx;
4441   PetscViewer       out;
4442   char              *name;
4443   Mat               B;
4444   const PetscScalar *values;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4448   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4449   /* Should this be the type of the diagonal block of A? */
4450   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4451   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4452   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4453   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4454   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4455   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4456   for (i=0;i<m;i++) {
4457     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4458     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4459     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4460   }
4461   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463 
4464   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4465   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4466   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4467   sprintf(name,"%s.%d",outfile,rank);
4468   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4469   ierr = PetscFree(name);
4470   ierr = MatView(B,out);CHKERRQ(ierr);
4471   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4472   ierr = MatDestroy(&B);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4479 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4480 {
4481   PetscErrorCode       ierr;
4482   Mat_Merge_SeqsToMPI  *merge;
4483   PetscContainer       container;
4484 
4485   PetscFunctionBegin;
4486   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4487   if (container) {
4488     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4501     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4502     ierr = PetscFree(merge);CHKERRQ(ierr);
4503     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4504   }
4505   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 #include <../src/mat/utils/freespace.h>
4510 #include <petscbt.h>
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4514 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4515 {
4516   PetscErrorCode       ierr;
4517   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4518   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4519   PetscMPIInt          size,rank,taga,*len_s;
4520   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4521   PetscInt             proc,m;
4522   PetscInt             **buf_ri,**buf_rj;
4523   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4524   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4525   MPI_Request          *s_waits,*r_waits;
4526   MPI_Status           *status;
4527   MatScalar            *aa=a->a;
4528   MatScalar            **abuf_r,*ba_i;
4529   Mat_Merge_SeqsToMPI  *merge;
4530   PetscContainer       container;
4531 
4532   PetscFunctionBegin;
4533   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4534 
4535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4536   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4537 
4538   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4539   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4540 
4541   bi     = merge->bi;
4542   bj     = merge->bj;
4543   buf_ri = merge->buf_ri;
4544   buf_rj = merge->buf_rj;
4545 
4546   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4547   owners = merge->rowmap->range;
4548   len_s  = merge->len_s;
4549 
4550   /* send and recv matrix values */
4551   /*-----------------------------*/
4552   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4553   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4554 
4555   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4556   for (proc=0,k=0; proc<size; proc++){
4557     if (!len_s[proc]) continue;
4558     i = owners[proc];
4559     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4560     k++;
4561   }
4562 
4563   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4564   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4565   ierr = PetscFree(status);CHKERRQ(ierr);
4566 
4567   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4568   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4569 
4570   /* insert mat values of mpimat */
4571   /*----------------------------*/
4572   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4573   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4574 
4575   for (k=0; k<merge->nrecv; k++){
4576     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4577     nrows = *(buf_ri_k[k]);
4578     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4579     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4580   }
4581 
4582   /* set values of ba */
4583   m = merge->rowmap->n;
4584   for (i=0; i<m; i++) {
4585     arow = owners[rank] + i;
4586     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4587     bnzi = bi[i+1] - bi[i];
4588     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4589 
4590     /* add local non-zero vals of this proc's seqmat into ba */
4591     anzi = ai[arow+1] - ai[arow];
4592     aj   = a->j + ai[arow];
4593     aa   = a->a + ai[arow];
4594     nextaj = 0;
4595     for (j=0; nextaj<anzi; j++){
4596       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4597         ba_i[j] += aa[nextaj++];
4598       }
4599     }
4600 
4601     /* add received vals into ba */
4602     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4603       /* i-th row */
4604       if (i == *nextrow[k]) {
4605         anzi = *(nextai[k]+1) - *nextai[k];
4606         aj   = buf_rj[k] + *(nextai[k]);
4607         aa   = abuf_r[k] + *(nextai[k]);
4608         nextaj = 0;
4609         for (j=0; nextaj<anzi; j++){
4610           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4611             ba_i[j] += aa[nextaj++];
4612           }
4613         }
4614         nextrow[k]++; nextai[k]++;
4615       }
4616     }
4617     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4618   }
4619   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4620   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621 
4622   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4623   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4624   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4625   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4626   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4627   PetscFunctionReturn(0);
4628 }
4629 
4630 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4631 
4632 #undef __FUNCT__
4633 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4634 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4635 {
4636   PetscErrorCode       ierr;
4637   Mat                  B_mpi;
4638   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4639   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4640   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4641   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4642   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4643   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4644   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4645   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4646   MPI_Status           *status;
4647   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4648   PetscBT              lnkbt;
4649   Mat_Merge_SeqsToMPI  *merge;
4650   PetscContainer       container;
4651 
4652   PetscFunctionBegin;
4653   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4654 
4655   /* make sure it is a PETSc comm */
4656   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4659 
4660   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4661   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4662 
4663   /* determine row ownership */
4664   /*---------------------------------------------------------*/
4665   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4670   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4671   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4672 
4673   m      = merge->rowmap->n;
4674   M      = merge->rowmap->N;
4675   owners = merge->rowmap->range;
4676 
4677   /* determine the number of messages to send, their lengths */
4678   /*---------------------------------------------------------*/
4679   len_s  = merge->len_s;
4680 
4681   len = 0;  /* length of buf_si[] */
4682   merge->nsend = 0;
4683   for (proc=0; proc<size; proc++){
4684     len_si[proc] = 0;
4685     if (proc == rank){
4686       len_s[proc] = 0;
4687     } else {
4688       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4689       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4690     }
4691     if (len_s[proc]) {
4692       merge->nsend++;
4693       nrows = 0;
4694       for (i=owners[proc]; i<owners[proc+1]; i++){
4695         if (ai[i+1] > ai[i]) nrows++;
4696       }
4697       len_si[proc] = 2*(nrows+1);
4698       len += len_si[proc];
4699     }
4700   }
4701 
4702   /* determine the number and length of messages to receive for ij-structure */
4703   /*-------------------------------------------------------------------------*/
4704   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4705   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4706 
4707   /* post the Irecv of j-structure */
4708   /*-------------------------------*/
4709   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4710   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4711 
4712   /* post the Isend of j-structure */
4713   /*--------------------------------*/
4714   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4715 
4716   for (proc=0, k=0; proc<size; proc++){
4717     if (!len_s[proc]) continue;
4718     i = owners[proc];
4719     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4720     k++;
4721   }
4722 
4723   /* receives and sends of j-structure are complete */
4724   /*------------------------------------------------*/
4725   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4726   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4727 
4728   /* send and recv i-structure */
4729   /*---------------------------*/
4730   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4731   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4732 
4733   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4734   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4735   for (proc=0,k=0; proc<size; proc++){
4736     if (!len_s[proc]) continue;
4737     /* form outgoing message for i-structure:
4738          buf_si[0]:                 nrows to be sent
4739                [1:nrows]:           row index (global)
4740                [nrows+1:2*nrows+1]: i-structure index
4741     */
4742     /*-------------------------------------------*/
4743     nrows = len_si[proc]/2 - 1;
4744     buf_si_i    = buf_si + nrows+1;
4745     buf_si[0]   = nrows;
4746     buf_si_i[0] = 0;
4747     nrows = 0;
4748     for (i=owners[proc]; i<owners[proc+1]; i++){
4749       anzi = ai[i+1] - ai[i];
4750       if (anzi) {
4751         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4752         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4753         nrows++;
4754       }
4755     }
4756     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4757     k++;
4758     buf_si += len_si[proc];
4759   }
4760 
4761   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4762   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4763 
4764   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4765   for (i=0; i<merge->nrecv; i++){
4766     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4767   }
4768 
4769   ierr = PetscFree(len_si);CHKERRQ(ierr);
4770   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4771   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4772   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4775   ierr = PetscFree(status);CHKERRQ(ierr);
4776 
4777   /* compute a local seq matrix in each processor */
4778   /*----------------------------------------------*/
4779   /* allocate bi array and free space for accumulating nonzero column info */
4780   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4781   bi[0] = 0;
4782 
4783   /* create and initialize a linked list */
4784   nlnk = N+1;
4785   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786 
4787   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4788   len = 0;
4789   len  = ai[owners[rank+1]] - ai[owners[rank]];
4790   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4791   current_space = free_space;
4792 
4793   /* determine symbolic info for each local row */
4794   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4795 
4796   for (k=0; k<merge->nrecv; k++){
4797     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4798     nrows = *buf_ri_k[k];
4799     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4800     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4801   }
4802 
4803   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4804   len = 0;
4805   for (i=0;i<m;i++) {
4806     bnzi   = 0;
4807     /* add local non-zero cols of this proc's seqmat into lnk */
4808     arow   = owners[rank] + i;
4809     anzi   = ai[arow+1] - ai[arow];
4810     aj     = a->j + ai[arow];
4811     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4812     bnzi += nlnk;
4813     /* add received col data into lnk */
4814     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4815       if (i == *nextrow[k]) { /* i-th row */
4816         anzi = *(nextai[k]+1) - *nextai[k];
4817         aj   = buf_rj[k] + *nextai[k];
4818         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4819         bnzi += nlnk;
4820         nextrow[k]++; nextai[k]++;
4821       }
4822     }
4823     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4824 
4825     /* if free space is not available, make more free space */
4826     if (current_space->local_remaining<bnzi) {
4827       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4828       nspacedouble++;
4829     }
4830     /* copy data into free space, then initialize lnk */
4831     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4832     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4833 
4834     current_space->array           += bnzi;
4835     current_space->local_used      += bnzi;
4836     current_space->local_remaining -= bnzi;
4837 
4838     bi[i+1] = bi[i] + bnzi;
4839   }
4840 
4841   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4842 
4843   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4844   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4845   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4846 
4847   /* create symbolic parallel matrix B_mpi */
4848   /*---------------------------------------*/
4849     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4850   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4851   if (n==PETSC_DECIDE) {
4852     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4853   } else {
4854     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4855   }
4856   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4857   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4858   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4859   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4860   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4861 
4862   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4863   B_mpi->assembled     = PETSC_FALSE;
4864   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4865   merge->bi            = bi;
4866   merge->bj            = bj;
4867   merge->buf_ri        = buf_ri;
4868   merge->buf_rj        = buf_rj;
4869   merge->coi           = PETSC_NULL;
4870   merge->coj           = PETSC_NULL;
4871   merge->owners_co     = PETSC_NULL;
4872 
4873   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4874 
4875   /* attach the supporting struct to B_mpi for reuse */
4876   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4877   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4878   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4879   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4880   *mpimat = B_mpi;
4881 
4882   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4883   PetscFunctionReturn(0);
4884 }
4885 
4886 #undef __FUNCT__
4887 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4888 /*@C
4889       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4890                  matrices from each processor
4891 
4892     Collective on MPI_Comm
4893 
4894    Input Parameters:
4895 +    comm - the communicators the parallel matrix will live on
4896 .    seqmat - the input sequential matrices
4897 .    m - number of local rows (or PETSC_DECIDE)
4898 .    n - number of local columns (or PETSC_DECIDE)
4899 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4900 
4901    Output Parameter:
4902 .    mpimat - the parallel matrix generated
4903 
4904     Level: advanced
4905 
4906    Notes:
4907      The dimensions of the sequential matrix in each processor MUST be the same.
4908      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4909      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4910 @*/
4911 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4912 {
4913   PetscErrorCode   ierr;
4914   PetscMPIInt     size;
4915 
4916   PetscFunctionBegin;
4917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4918   if (size == 1){
4919      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920      if (scall == MAT_INITIAL_MATRIX){
4921        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4922      } else {
4923        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4924      }
4925      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926      PetscFunctionReturn(0);
4927   }
4928   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929   if (scall == MAT_INITIAL_MATRIX){
4930     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4931   }
4932   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4933   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4934   PetscFunctionReturn(0);
4935 }
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4939 /*@
4940      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4941           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4942           with MatGetSize()
4943 
4944     Not Collective
4945 
4946    Input Parameters:
4947 +    A - the matrix
4948 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4949 
4950    Output Parameter:
4951 .    A_loc - the local sequential matrix generated
4952 
4953     Level: developer
4954 
4955 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4956 
4957 @*/
4958 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4959 {
4960   PetscErrorCode  ierr;
4961   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4962   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4963   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4964   MatScalar       *aa=a->a,*ba=b->a,*cam;
4965   PetscScalar     *ca;
4966   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4967   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4968   PetscBool       match;
4969 
4970   PetscFunctionBegin;
4971   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4972   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4973   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4974   if (scall == MAT_INITIAL_MATRIX){
4975     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4976     ci[0] = 0;
4977     for (i=0; i<am; i++){
4978       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4979     }
4980     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4981     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4982     k = 0;
4983     for (i=0; i<am; i++) {
4984       ncols_o = bi[i+1] - bi[i];
4985       ncols_d = ai[i+1] - ai[i];
4986       /* off-diagonal portion of A */
4987       for (jo=0; jo<ncols_o; jo++) {
4988         col = cmap[*bj];
4989         if (col >= cstart) break;
4990         cj[k]   = col; bj++;
4991         ca[k++] = *ba++;
4992       }
4993       /* diagonal portion of A */
4994       for (j=0; j<ncols_d; j++) {
4995         cj[k]   = cstart + *aj++;
4996         ca[k++] = *aa++;
4997       }
4998       /* off-diagonal portion of A */
4999       for (j=jo; j<ncols_o; j++) {
5000         cj[k]   = cmap[*bj++];
5001         ca[k++] = *ba++;
5002       }
5003     }
5004     /* put together the new matrix */
5005     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5006     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5007     /* Since these are PETSc arrays, change flags to free them as necessary. */
5008     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5009     mat->free_a  = PETSC_TRUE;
5010     mat->free_ij = PETSC_TRUE;
5011     mat->nonew   = 0;
5012   } else if (scall == MAT_REUSE_MATRIX){
5013     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5014     ci = mat->i; cj = mat->j; cam = mat->a;
5015     for (i=0; i<am; i++) {
5016       /* off-diagonal portion of A */
5017       ncols_o = bi[i+1] - bi[i];
5018       for (jo=0; jo<ncols_o; jo++) {
5019         col = cmap[*bj];
5020         if (col >= cstart) break;
5021         *cam++ = *ba++; bj++;
5022       }
5023       /* diagonal portion of A */
5024       ncols_d = ai[i+1] - ai[i];
5025       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5026       /* off-diagonal portion of A */
5027       for (j=jo; j<ncols_o; j++) {
5028         *cam++ = *ba++; bj++;
5029       }
5030     }
5031   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5032   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5038 /*@C
5039      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5040 
5041     Not Collective
5042 
5043    Input Parameters:
5044 +    A - the matrix
5045 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5046 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5047 
5048    Output Parameter:
5049 .    A_loc - the local sequential matrix generated
5050 
5051     Level: developer
5052 
5053 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5054 
5055 @*/
5056 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5057 {
5058   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5059   PetscErrorCode    ierr;
5060   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5061   IS                isrowa,iscola;
5062   Mat               *aloc;
5063   PetscBool       match;
5064 
5065   PetscFunctionBegin;
5066   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5067   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5068   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5069   if (!row){
5070     start = A->rmap->rstart; end = A->rmap->rend;
5071     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5072   } else {
5073     isrowa = *row;
5074   }
5075   if (!col){
5076     start = A->cmap->rstart;
5077     cmap  = a->garray;
5078     nzA   = a->A->cmap->n;
5079     nzB   = a->B->cmap->n;
5080     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5081     ncols = 0;
5082     for (i=0; i<nzB; i++) {
5083       if (cmap[i] < start) idx[ncols++] = cmap[i];
5084       else break;
5085     }
5086     imark = i;
5087     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5088     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5089     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5090   } else {
5091     iscola = *col;
5092   }
5093   if (scall != MAT_INITIAL_MATRIX){
5094     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5095     aloc[0] = *A_loc;
5096   }
5097   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5098   *A_loc = aloc[0];
5099   ierr = PetscFree(aloc);CHKERRQ(ierr);
5100   if (!row){
5101     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5102   }
5103   if (!col){
5104     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5105   }
5106   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5107   PetscFunctionReturn(0);
5108 }
5109 
5110 #undef __FUNCT__
5111 #define __FUNCT__ "MatGetBrowsOfAcols"
5112 /*@C
5113     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5114 
5115     Collective on Mat
5116 
5117    Input Parameters:
5118 +    A,B - the matrices in mpiaij format
5119 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5120 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5121 
5122    Output Parameter:
5123 +    rowb, colb - index sets of rows and columns of B to extract
5124 -    B_seq - the sequential matrix generated
5125 
5126     Level: developer
5127 
5128 @*/
5129 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5130 {
5131   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5132   PetscErrorCode    ierr;
5133   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5134   IS                isrowb,iscolb;
5135   Mat               *bseq=PETSC_NULL;
5136 
5137   PetscFunctionBegin;
5138   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5139     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5140   }
5141   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5142 
5143   if (scall == MAT_INITIAL_MATRIX){
5144     start = A->cmap->rstart;
5145     cmap  = a->garray;
5146     nzA   = a->A->cmap->n;
5147     nzB   = a->B->cmap->n;
5148     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5149     ncols = 0;
5150     for (i=0; i<nzB; i++) {  /* row < local row index */
5151       if (cmap[i] < start) idx[ncols++] = cmap[i];
5152       else break;
5153     }
5154     imark = i;
5155     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5156     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5157     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5158     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5159   } else {
5160     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5161     isrowb = *rowb; iscolb = *colb;
5162     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5163     bseq[0] = *B_seq;
5164   }
5165   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5166   *B_seq = bseq[0];
5167   ierr = PetscFree(bseq);CHKERRQ(ierr);
5168   if (!rowb){
5169     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5170   } else {
5171     *rowb = isrowb;
5172   }
5173   if (!colb){
5174     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5175   } else {
5176     *colb = iscolb;
5177   }
5178   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5179   PetscFunctionReturn(0);
5180 }
5181 
5182 #undef __FUNCT__
5183 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5184 /*
5185     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5186     of the OFF-DIAGONAL portion of local A
5187 
5188     Collective on Mat
5189 
5190    Input Parameters:
5191 +    A,B - the matrices in mpiaij format
5192 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5193 
5194    Output Parameter:
5195 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5196 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5197 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5198 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5199 
5200     Level: developer
5201 
5202 */
5203 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5204 {
5205   VecScatter_MPI_General *gen_to,*gen_from;
5206   PetscErrorCode         ierr;
5207   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5208   Mat_SeqAIJ             *b_oth;
5209   VecScatter             ctx=a->Mvctx;
5210   MPI_Comm               comm=((PetscObject)ctx)->comm;
5211   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5212   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5213   PetscScalar            *rvalues,*svalues;
5214   MatScalar              *b_otha,*bufa,*bufA;
5215   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5216   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5217   MPI_Status             *sstatus,rstatus;
5218   PetscMPIInt            jj;
5219   PetscInt               *cols,sbs,rbs;
5220   PetscScalar            *vals;
5221 
5222   PetscFunctionBegin;
5223   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5224     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5225   }
5226   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5228 
5229   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5230   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5231   rvalues  = gen_from->values; /* holds the length of receiving row */
5232   svalues  = gen_to->values;   /* holds the length of sending row */
5233   nrecvs   = gen_from->n;
5234   nsends   = gen_to->n;
5235 
5236   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5237   srow     = gen_to->indices;   /* local row index to be sent */
5238   sstarts  = gen_to->starts;
5239   sprocs   = gen_to->procs;
5240   sstatus  = gen_to->sstatus;
5241   sbs      = gen_to->bs;
5242   rstarts  = gen_from->starts;
5243   rprocs   = gen_from->procs;
5244   rbs      = gen_from->bs;
5245 
5246   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5247   if (scall == MAT_INITIAL_MATRIX){
5248     /* i-array */
5249     /*---------*/
5250     /*  post receives */
5251     for (i=0; i<nrecvs; i++){
5252       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5253       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5254       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5255     }
5256 
5257     /* pack the outgoing message */
5258     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5259     sstartsj[0] = 0;  rstartsj[0] = 0;
5260     len = 0; /* total length of j or a array to be sent */
5261     k = 0;
5262     for (i=0; i<nsends; i++){
5263       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5264       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5265       for (j=0; j<nrows; j++) {
5266         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5267         for (l=0; l<sbs; l++){
5268           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5269           rowlen[j*sbs+l] = ncols;
5270           len += ncols;
5271           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5272         }
5273         k++;
5274       }
5275       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5276       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5277     }
5278     /* recvs and sends of i-array are completed */
5279     i = nrecvs;
5280     while (i--) {
5281       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5282     }
5283     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5284 
5285     /* allocate buffers for sending j and a arrays */
5286     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5287     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5288 
5289     /* create i-array of B_oth */
5290     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5291     b_othi[0] = 0;
5292     len = 0; /* total length of j or a array to be received */
5293     k = 0;
5294     for (i=0; i<nrecvs; i++){
5295       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5296       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5297       for (j=0; j<nrows; j++) {
5298         b_othi[k+1] = b_othi[k] + rowlen[j];
5299         len += rowlen[j]; k++;
5300       }
5301       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5302     }
5303 
5304     /* allocate space for j and a arrrays of B_oth */
5305     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5306     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5307 
5308     /* j-array */
5309     /*---------*/
5310     /*  post receives of j-array */
5311     for (i=0; i<nrecvs; i++){
5312       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5313       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* pack the outgoing message j-array */
5317     k = 0;
5318     for (i=0; i<nsends; i++){
5319       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5320       bufJ = bufj+sstartsj[i];
5321       for (j=0; j<nrows; j++) {
5322         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5323         for (ll=0; ll<sbs; ll++){
5324           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5325           for (l=0; l<ncols; l++){
5326             *bufJ++ = cols[l];
5327           }
5328           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5329         }
5330       }
5331       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5332     }
5333 
5334     /* recvs and sends of j-array are completed */
5335     i = nrecvs;
5336     while (i--) {
5337       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5338     }
5339     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5340   } else if (scall == MAT_REUSE_MATRIX){
5341     sstartsj = *startsj_s;
5342     rstartsj = *startsj_r;
5343     bufa     = *bufa_ptr;
5344     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5345     b_otha   = b_oth->a;
5346   } else {
5347     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5348   }
5349 
5350   /* a-array */
5351   /*---------*/
5352   /*  post receives of a-array */
5353   for (i=0; i<nrecvs; i++){
5354     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356   }
5357 
5358   /* pack the outgoing message a-array */
5359   k = 0;
5360   for (i=0; i<nsends; i++){
5361     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362     bufA = bufa+sstartsj[i];
5363     for (j=0; j<nrows; j++) {
5364       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5365       for (ll=0; ll<sbs; ll++){
5366         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5367         for (l=0; l<ncols; l++){
5368           *bufA++ = vals[l];
5369         }
5370         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5371       }
5372     }
5373     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5374   }
5375   /* recvs and sends of a-array are completed */
5376   i = nrecvs;
5377   while (i--) {
5378     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5379   }
5380   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5381   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5382 
5383   if (scall == MAT_INITIAL_MATRIX){
5384     /* put together the new matrix */
5385     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5386 
5387     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5388     /* Since these are PETSc arrays, change flags to free them as necessary. */
5389     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5390     b_oth->free_a  = PETSC_TRUE;
5391     b_oth->free_ij = PETSC_TRUE;
5392     b_oth->nonew   = 0;
5393 
5394     ierr = PetscFree(bufj);CHKERRQ(ierr);
5395     if (!startsj_s || !bufa_ptr){
5396       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5397       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5398     } else {
5399       *startsj_s = sstartsj;
5400       *startsj_r = rstartsj;
5401       *bufa_ptr  = bufa;
5402     }
5403   }
5404   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5405   PetscFunctionReturn(0);
5406 }
5407 
5408 #undef __FUNCT__
5409 #define __FUNCT__ "MatGetCommunicationStructs"
5410 /*@C
5411   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5412 
5413   Not Collective
5414 
5415   Input Parameters:
5416 . A - The matrix in mpiaij format
5417 
5418   Output Parameter:
5419 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5420 . colmap - A map from global column index to local index into lvec
5421 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5422 
5423   Level: developer
5424 
5425 @*/
5426 #if defined (PETSC_USE_CTABLE)
5427 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5428 #else
5429 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5430 #endif
5431 {
5432   Mat_MPIAIJ *a;
5433 
5434   PetscFunctionBegin;
5435   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5436   PetscValidPointer(lvec, 2);
5437   PetscValidPointer(colmap, 3);
5438   PetscValidPointer(multScatter, 4);
5439   a = (Mat_MPIAIJ *) A->data;
5440   if (lvec) *lvec = a->lvec;
5441   if (colmap) *colmap = a->colmap;
5442   if (multScatter) *multScatter = a->Mvctx;
5443   PetscFunctionReturn(0);
5444 }
5445 
5446 EXTERN_C_BEGIN
5447 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5448 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5449 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5450 EXTERN_C_END
5451 
5452 #undef __FUNCT__
5453 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5454 /*
5455     Computes (B'*A')' since computing B*A directly is untenable
5456 
5457                n                       p                          p
5458         (              )       (              )         (                  )
5459       m (      A       )  *  n (       B      )   =   m (         C        )
5460         (              )       (              )         (                  )
5461 
5462 */
5463 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5464 {
5465   PetscErrorCode     ierr;
5466   Mat                At,Bt,Ct;
5467 
5468   PetscFunctionBegin;
5469   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5470   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5471   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5472   ierr = MatDestroy(&At);CHKERRQ(ierr);
5473   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5474   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5475   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5476   PetscFunctionReturn(0);
5477 }
5478 
5479 #undef __FUNCT__
5480 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5481 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5482 {
5483   PetscErrorCode ierr;
5484   PetscInt       m=A->rmap->n,n=B->cmap->n;
5485   Mat            Cmat;
5486 
5487   PetscFunctionBegin;
5488   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5489   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5490   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5491   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5492   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5493   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5494   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   *C   = Cmat;
5497   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5498   PetscFunctionReturn(0);
5499 }
5500 
5501 /* ----------------------------------------------------------------*/
5502 #undef __FUNCT__
5503 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5504 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5505 {
5506   PetscErrorCode ierr;
5507 
5508   PetscFunctionBegin;
5509   if (scall == MAT_INITIAL_MATRIX){
5510     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5511   }
5512   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5513   PetscFunctionReturn(0);
5514 }
5515 
5516 EXTERN_C_BEGIN
5517 #if defined(PETSC_HAVE_MUMPS)
5518 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_PASTIX)
5521 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5522 #endif
5523 #if defined(PETSC_HAVE_SUPERLU_DIST)
5524 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5525 #endif
5526 #if defined(PETSC_HAVE_SPOOLES)
5527 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5528 #endif
5529 EXTERN_C_END
5530 
5531 /*MC
5532    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5533 
5534    Options Database Keys:
5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5536 
5537   Level: beginner
5538 
5539 .seealso: MatCreateAIJ()
5540 M*/
5541 
5542 EXTERN_C_BEGIN
5543 #undef __FUNCT__
5544 #define __FUNCT__ "MatCreate_MPIAIJ"
5545 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5546 {
5547   Mat_MPIAIJ     *b;
5548   PetscErrorCode ierr;
5549   PetscMPIInt    size;
5550 
5551   PetscFunctionBegin;
5552   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5553   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5554   B->data         = (void*)b;
5555   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5556   B->assembled    = PETSC_FALSE;
5557   B->insertmode   = NOT_SET_VALUES;
5558   b->size         = size;
5559   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5560 
5561   /* build cache for off array entries formed */
5562   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5563   b->donotstash  = PETSC_FALSE;
5564   b->colmap      = 0;
5565   b->garray      = 0;
5566   b->roworiented = PETSC_TRUE;
5567 
5568   /* stuff used for matrix vector multiply */
5569   b->lvec      = PETSC_NULL;
5570   b->Mvctx     = PETSC_NULL;
5571 
5572   /* stuff for MatGetRow() */
5573   b->rowindices   = 0;
5574   b->rowvalues    = 0;
5575   b->getrowactive = PETSC_FALSE;
5576 
5577   /* flexible pointer used in CUSP/CUSPARSE classes */
5578   b->spptr        = PETSC_NULL;
5579 #if defined(PETSC_HAVE_SPOOLES)
5580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5581                                      "MatGetFactor_mpiaij_spooles",
5582                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5583 #endif
5584 #if defined(PETSC_HAVE_MUMPS)
5585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5586                                      "MatGetFactor_aij_mumps",
5587                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5588 #endif
5589 #if defined(PETSC_HAVE_PASTIX)
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5591 					   "MatGetFactor_mpiaij_pastix",
5592 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5593 #endif
5594 #if defined(PETSC_HAVE_SUPERLU_DIST)
5595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5596                                      "MatGetFactor_mpiaij_superlu_dist",
5597                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5598 #endif
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5600                                      "MatStoreValues_MPIAIJ",
5601                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5603                                      "MatRetrieveValues_MPIAIJ",
5604                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5606 				     "MatGetDiagonalBlock_MPIAIJ",
5607                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5609 				     "MatIsTranspose_MPIAIJ",
5610 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5612 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5613 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5615 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5616 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5618 				     "MatDiagonalScaleLocal_MPIAIJ",
5619 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5621                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5622                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5624                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5625                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5627                                      "MatConvert_MPIAIJ_MPISBAIJ",
5628                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5630                                      "MatMatMult_MPIDense_MPIAIJ",
5631                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5633                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5634                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5636                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5637                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5639   PetscFunctionReturn(0);
5640 }
5641 EXTERN_C_END
5642 
5643 #undef __FUNCT__
5644 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5645 /*@
5646      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5647          and "off-diagonal" part of the matrix in CSR format.
5648 
5649    Collective on MPI_Comm
5650 
5651    Input Parameters:
5652 +  comm - MPI communicator
5653 .  m - number of local rows (Cannot be PETSC_DECIDE)
5654 .  n - This value should be the same as the local size used in creating the
5655        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5656        calculated if N is given) For square matrices n is almost always m.
5657 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5658 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5659 .   i - row indices for "diagonal" portion of matrix
5660 .   j - column indices
5661 .   a - matrix values
5662 .   oi - row indices for "off-diagonal" portion of matrix
5663 .   oj - column indices
5664 -   oa - matrix values
5665 
5666    Output Parameter:
5667 .   mat - the matrix
5668 
5669    Level: advanced
5670 
5671    Notes:
5672        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5673        must free the arrays once the matrix has been destroyed and not before.
5674 
5675        The i and j indices are 0 based
5676 
5677        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5678 
5679        This sets local rows and cannot be used to set off-processor values.
5680 
5681        You cannot later use MatSetValues() to change values in this matrix.
5682 
5683 .keywords: matrix, aij, compressed row, sparse, parallel
5684 
5685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5686           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5687 @*/
5688 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5689 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5690 {
5691   PetscErrorCode ierr;
5692   Mat_MPIAIJ     *maij;
5693 
5694  PetscFunctionBegin;
5695   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5696   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5697   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5698   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5699   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5700   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5701   maij = (Mat_MPIAIJ*) (*mat)->data;
5702   maij->donotstash     = PETSC_TRUE;
5703   (*mat)->preallocated = PETSC_TRUE;
5704 
5705   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5706   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5707 
5708   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5709   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5710 
5711   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5712   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5713   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5714   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5715 
5716   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5717   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5718   PetscFunctionReturn(0);
5719 }
5720 
5721 /*
5722     Special version for direct calls from Fortran
5723 */
5724 #include <petsc-private/fortranimpl.h>
5725 
5726 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5727 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5728 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5729 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5730 #endif
5731 
5732 /* Change these macros so can be used in void function */
5733 #undef CHKERRQ
5734 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5735 #undef SETERRQ2
5736 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5737 #undef SETERRQ3
5738 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5739 #undef SETERRQ
5740 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5741 
5742 EXTERN_C_BEGIN
5743 #undef __FUNCT__
5744 #define __FUNCT__ "matsetvaluesmpiaij_"
5745 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5746 {
5747   Mat             mat = *mmat;
5748   PetscInt        m = *mm, n = *mn;
5749   InsertMode      addv = *maddv;
5750   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5751   PetscScalar     value;
5752   PetscErrorCode  ierr;
5753 
5754   MatCheckPreallocated(mat,1);
5755   if (mat->insertmode == NOT_SET_VALUES) {
5756     mat->insertmode = addv;
5757   }
5758 #if defined(PETSC_USE_DEBUG)
5759   else if (mat->insertmode != addv) {
5760     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5761   }
5762 #endif
5763   {
5764   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5765   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5766   PetscBool       roworiented = aij->roworiented;
5767 
5768   /* Some Variables required in the macro */
5769   Mat             A = aij->A;
5770   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5771   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5772   MatScalar       *aa = a->a;
5773   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5774   Mat             B = aij->B;
5775   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5776   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5777   MatScalar       *ba = b->a;
5778 
5779   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5780   PetscInt        nonew = a->nonew;
5781   MatScalar       *ap1,*ap2;
5782 
5783   PetscFunctionBegin;
5784   for (i=0; i<m; i++) {
5785     if (im[i] < 0) continue;
5786 #if defined(PETSC_USE_DEBUG)
5787     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5788 #endif
5789     if (im[i] >= rstart && im[i] < rend) {
5790       row      = im[i] - rstart;
5791       lastcol1 = -1;
5792       rp1      = aj + ai[row];
5793       ap1      = aa + ai[row];
5794       rmax1    = aimax[row];
5795       nrow1    = ailen[row];
5796       low1     = 0;
5797       high1    = nrow1;
5798       lastcol2 = -1;
5799       rp2      = bj + bi[row];
5800       ap2      = ba + bi[row];
5801       rmax2    = bimax[row];
5802       nrow2    = bilen[row];
5803       low2     = 0;
5804       high2    = nrow2;
5805 
5806       for (j=0; j<n; j++) {
5807         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5808         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5809         if (in[j] >= cstart && in[j] < cend){
5810           col = in[j] - cstart;
5811           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5812         } else if (in[j] < 0) continue;
5813 #if defined(PETSC_USE_DEBUG)
5814         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5815 #endif
5816         else {
5817           if (mat->was_assembled) {
5818             if (!aij->colmap) {
5819               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5820             }
5821 #if defined (PETSC_USE_CTABLE)
5822             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5823 	    col--;
5824 #else
5825             col = aij->colmap[in[j]] - 1;
5826 #endif
5827             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5828               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5829               col =  in[j];
5830               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5831               B = aij->B;
5832               b = (Mat_SeqAIJ*)B->data;
5833               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5834               rp2      = bj + bi[row];
5835               ap2      = ba + bi[row];
5836               rmax2    = bimax[row];
5837               nrow2    = bilen[row];
5838               low2     = 0;
5839               high2    = nrow2;
5840               bm       = aij->B->rmap->n;
5841               ba = b->a;
5842             }
5843           } else col = in[j];
5844           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5845         }
5846       }
5847     } else {
5848       if (!aij->donotstash) {
5849         if (roworiented) {
5850           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5851         } else {
5852           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5853         }
5854       }
5855     }
5856   }}
5857   PetscFunctionReturnVoid();
5858 }
5859 EXTERN_C_END
5860 
5861