xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7a3afff6730f82e9315b80da4b583bc441ed2b08)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   MPI_Comm       comm;
1725   PetscInt       first,local_rowsize,local_colsize;
1726   const PetscInt *rows;
1727   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1732   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1733   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1734   /* collect the global row permutation and invert it */
1735   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1736   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1737   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1738   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1739   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1740   /* get the local target indices */
1741   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1743   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1744   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1745   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1746   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1747   /* the column permutation is so much easier;
1748      make a local version of 'colp' and invert it */
1749   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1750   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1751   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1753   /* now we just get the submatrix */
1754   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1755   /* clean up */
1756   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1757   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1764 {
1765   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1766   Mat            A = mat->A,B = mat->B;
1767   PetscErrorCode ierr;
1768   PetscReal      isend[5],irecv[5];
1769 
1770   PetscFunctionBegin;
1771   info->block_size     = 1.0;
1772   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1774   isend[3] = info->memory;  isend[4] = info->mallocs;
1775   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1776   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1777   isend[3] += info->memory;  isend[4] += info->mallocs;
1778   if (flag == MAT_LOCAL) {
1779     info->nz_used      = isend[0];
1780     info->nz_allocated = isend[1];
1781     info->nz_unneeded  = isend[2];
1782     info->memory       = isend[3];
1783     info->mallocs      = isend[4];
1784   } else if (flag == MAT_GLOBAL_MAX) {
1785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1793     info->nz_used      = irecv[0];
1794     info->nz_allocated = irecv[1];
1795     info->nz_unneeded  = irecv[2];
1796     info->memory       = irecv[3];
1797     info->mallocs      = irecv[4];
1798   }
1799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1800   info->fill_ratio_needed = 0;
1801   info->factor_mallocs    = 0;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetOption_MPIAIJ"
1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1808 {
1809   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   switch (op) {
1814   case MAT_NEW_NONZERO_LOCATIONS:
1815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1816   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1817   case MAT_KEEP_NONZERO_PATTERN:
1818   case MAT_NEW_NONZERO_LOCATION_ERR:
1819   case MAT_USE_INODES:
1820   case MAT_IGNORE_ZERO_ENTRIES:
1821     MatCheckPreallocated(A,1);
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_ROW_ORIENTED:
1826     a->roworiented = flg;
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_NEW_DIAGONALS:
1831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1832     break;
1833   case MAT_IGNORE_OFF_PROC_ENTRIES:
1834     a->donotstash = flg;
1835     break;
1836   case MAT_SPD:
1837     A->spd_set                         = PETSC_TRUE;
1838     A->spd                             = flg;
1839     if (flg) {
1840       A->symmetric                     = PETSC_TRUE;
1841       A->structurally_symmetric        = PETSC_TRUE;
1842       A->symmetric_set                 = PETSC_TRUE;
1843       A->structurally_symmetric_set    = PETSC_TRUE;
1844     }
1845     break;
1846   case MAT_SYMMETRIC:
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_STRUCTURALLY_SYMMETRIC:
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_HERMITIAN:
1853     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1854     break;
1855   case MAT_SYMMETRY_ETERNAL:
1856     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1857     break;
1858   default:
1859     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRow_MPIAIJ"
1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1867 {
1868   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1869   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1870   PetscErrorCode ierr;
1871   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1872   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1873   PetscInt       *cmap,*idx_p;
1874 
1875   PetscFunctionBegin;
1876   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1877   mat->getrowactive = PETSC_TRUE;
1878 
1879   if (!mat->rowvalues && (idx || v)) {
1880     /*
1881         allocate enough space to hold information from the longest row.
1882     */
1883     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1884     PetscInt   max = 1,tmp;
1885     for (i=0; i<matin->rmap->n; i++) {
1886       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1887       if (max < tmp) { max = tmp; }
1888     }
1889     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1890   }
1891 
1892   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1893   lrow = row - rstart;
1894 
1895   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1896   if (!v)   {pvA = 0; pvB = 0;}
1897   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1898   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1899   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1900   nztot = nzA + nzB;
1901 
1902   cmap  = mat->garray;
1903   if (v  || idx) {
1904     if (nztot) {
1905       /* Sort by increasing column numbers, assuming A and B already sorted */
1906       PetscInt imark = -1;
1907       if (v) {
1908         *v = v_p = mat->rowvalues;
1909         for (i=0; i<nzB; i++) {
1910           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1911           else break;
1912         }
1913         imark = i;
1914         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1915         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1916       }
1917       if (idx) {
1918         *idx = idx_p = mat->rowindices;
1919         if (imark > -1) {
1920           for (i=0; i<imark; i++) {
1921             idx_p[i] = cmap[cworkB[i]];
1922           }
1923         } else {
1924           for (i=0; i<nzB; i++) {
1925             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1926             else break;
1927           }
1928           imark = i;
1929         }
1930         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1931         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1932       }
1933     } else {
1934       if (idx) *idx = 0;
1935       if (v)   *v   = 0;
1936     }
1937   }
1938   *nz = nztot;
1939   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1940   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1947 {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1949 
1950   PetscFunctionBegin;
1951   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1952   aij->getrowactive = PETSC_FALSE;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatNorm_MPIAIJ"
1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1962   PetscErrorCode ierr;
1963   PetscInt       i,j,cstart = mat->cmap->rstart;
1964   PetscReal      sum = 0.0;
1965   MatScalar      *v;
1966 
1967   PetscFunctionBegin;
1968   if (aij->size == 1) {
1969     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1970   } else {
1971     if (type == NORM_FROBENIUS) {
1972       v = amat->a;
1973       for (i=0; i<amat->nz; i++) {
1974 #if defined(PETSC_USE_COMPLEX)
1975         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1976 #else
1977         sum += (*v)*(*v); v++;
1978 #endif
1979       }
1980       v = bmat->a;
1981       for (i=0; i<bmat->nz; i++) {
1982 #if defined(PETSC_USE_COMPLEX)
1983         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1984 #else
1985         sum += (*v)*(*v); v++;
1986 #endif
1987       }
1988       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990     } else if (type == NORM_1) { /* max column norm */
1991       PetscReal *tmp,*tmp2;
1992       PetscInt  *jj,*garray = aij->garray;
1993       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1994       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1995       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011     } else if (type == NORM_INFINITY) { /* max row norm */
2012       PetscReal ntemp = 0.0;
2013       for (j=0; j<aij->A->rmap->n; j++) {
2014         v = amat->a + amat->i[j];
2015         sum = 0.0;
2016         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2017           sum += PetscAbsScalar(*v); v++;
2018         }
2019         v = bmat->a + bmat->i[j];
2020         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2021           sum += PetscAbsScalar(*v); v++;
2022         }
2023         if (sum > ntemp) ntemp = sum;
2024       }
2025       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2026     } else {
2027       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2028     }
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatTranspose_MPIAIJ"
2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2036 {
2037   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2038   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2039   PetscErrorCode ierr;
2040   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2041   PetscInt       cstart=A->cmap->rstart,ncol;
2042   Mat            B;
2043   MatScalar      *array;
2044 
2045   PetscFunctionBegin;
2046   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2047 
2048   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2049   ai = Aloc->i; aj = Aloc->j;
2050   bi = Bloc->i; bj = Bloc->j;
2051   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2052     PetscInt *d_nnz,*g_nnz,*o_nnz;
2053     PetscSFNode *oloc;
2054     PetscSF sf;
2055 
2056     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2057     /* compute d_nnz for preallocation */
2058     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2059     for (i=0; i<ai[ma]; i++) {
2060       d_nnz[aj[i]] ++;
2061       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2062     }
2063     /* compute local off-diagonal contributions */
2064     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2065     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2066     /* map those to global */
2067     for (i=0; i<nb; i++) {
2068       PetscInt gcol = a->garray[i],colowner;
2069       ierr = PetscLayoutFindOwner(A->cmap,a->garray[i],&colowner);CHKERRQ(ierr);
2070       oloc[i].rank  = colowner;
2071       oloc[i].index = gcol - A->cmap->range[colowner];
2072     }
2073     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2074     ierr = PetscSFSetGraph(sf,na,nb,PETSC_NULL,PETSC_USE_POINTER,oloc,PETSC_USE_POINTER);CHKERRQ(ierr);
2075     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2076     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2077     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2078     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2079 
2080     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2081     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2082     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2083     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2084     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2085     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2086   } else {
2087     B = *matout;
2088     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2089     for (i=0; i<ai[ma]; i++){
2090       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2091     }
2092   }
2093 
2094   /* copy over the A part */
2095   array = Aloc->a;
2096   row = A->rmap->rstart;
2097   for (i=0; i<ma; i++) {
2098     ncol = ai[i+1]-ai[i];
2099     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2100     row++; array += ncol; aj += ncol;
2101   }
2102   aj = Aloc->j;
2103   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2104 
2105   /* copy over the B part */
2106   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2107   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2108   array = Bloc->a;
2109   row = A->rmap->rstart;
2110   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2111   cols_tmp = cols;
2112   for (i=0; i<mb; i++) {
2113     ncol = bi[i+1]-bi[i];
2114     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2115     row++; array += ncol; cols_tmp += ncol;
2116   }
2117   ierr = PetscFree(cols);CHKERRQ(ierr);
2118 
2119   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2120   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2121   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2122     *matout = B;
2123   } else {
2124     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2131 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2132 {
2133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2134   Mat            a = aij->A,b = aij->B;
2135   PetscErrorCode ierr;
2136   PetscInt       s1,s2,s3;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2140   if (rr) {
2141     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2142     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2143     /* Overlap communication with computation. */
2144     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2145   }
2146   if (ll) {
2147     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2148     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2149     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2150   }
2151   /* scale  the diagonal block */
2152   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2153 
2154   if (rr) {
2155     /* Do a scatter end and then right scale the off-diagonal block */
2156     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2157     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2158   }
2159 
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2165 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2166 {
2167   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatEqual_MPIAIJ"
2177 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2178 {
2179   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2180   Mat            a,b,c,d;
2181   PetscBool      flg;
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   a = matA->A; b = matA->B;
2186   c = matB->A; d = matB->B;
2187 
2188   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2189   if (flg) {
2190     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2191   }
2192   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatCopy_MPIAIJ"
2198 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2199 {
2200   PetscErrorCode ierr;
2201   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2202   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2203 
2204   PetscFunctionBegin;
2205   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2206   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2207     /* because of the column compression in the off-processor part of the matrix a->B,
2208        the number of columns in a->B and b->B may be different, hence we cannot call
2209        the MatCopy() directly on the two parts. If need be, we can provide a more
2210        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2211        then copying the submatrices */
2212     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2213   } else {
2214     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2215     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatSetUp_MPIAIJ"
2222 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2223 {
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2233 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2234 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2235 {
2236   PetscInt          i,m=Y->rmap->N;
2237   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2238   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2239   const PetscInt    *xi = x->i,*yi = y->i;
2240 
2241   PetscFunctionBegin;
2242   /* Set the number of nonzeros in the new matrix */
2243   for (i=0; i<m; i++) {
2244     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2245     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2246     nnz[i] = 0;
2247     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2248       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2249       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2250       nnz[i]++;
2251     }
2252     for (; k<nzy; k++) nnz[i]++;
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatAXPY_MPIAIJ"
2259 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2260 {
2261   PetscErrorCode ierr;
2262   PetscInt       i;
2263   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2264   PetscBLASInt   bnz,one=1;
2265   Mat_SeqAIJ     *x,*y;
2266 
2267   PetscFunctionBegin;
2268   if (str == SAME_NONZERO_PATTERN) {
2269     PetscScalar alpha = a;
2270     x = (Mat_SeqAIJ *)xx->A->data;
2271     y = (Mat_SeqAIJ *)yy->A->data;
2272     bnz = PetscBLASIntCast(x->nz);
2273     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2274     x = (Mat_SeqAIJ *)xx->B->data;
2275     y = (Mat_SeqAIJ *)yy->B->data;
2276     bnz = PetscBLASIntCast(x->nz);
2277     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2278   } else if (str == SUBSET_NONZERO_PATTERN) {
2279     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2280 
2281     x = (Mat_SeqAIJ *)xx->B->data;
2282     y = (Mat_SeqAIJ *)yy->B->data;
2283     if (y->xtoy && y->XtoY != xx->B) {
2284       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2285       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2286     }
2287     if (!y->xtoy) { /* get xtoy */
2288       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2289       y->XtoY = xx->B;
2290       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2291     }
2292     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2293   } else {
2294     Mat B;
2295     PetscInt *nnz_d,*nnz_o;
2296     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2297     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2298     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2299     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2300     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2301     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2302     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2303     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2304     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2305     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2306     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2307     ierr = MatHeaderReplace(Y,B);
2308     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2309     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2310   }
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatConjugate_MPIAIJ"
2318 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2319 {
2320 #if defined(PETSC_USE_COMPLEX)
2321   PetscErrorCode ierr;
2322   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2323 
2324   PetscFunctionBegin;
2325   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2326   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2327 #else
2328   PetscFunctionBegin;
2329 #endif
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatRealPart_MPIAIJ"
2335 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2336 {
2337   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2338   PetscErrorCode ierr;
2339 
2340   PetscFunctionBegin;
2341   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2342   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2348 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2349 {
2350   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2355   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #ifdef PETSC_HAVE_PBGL
2360 
2361 #include <boost/parallel/mpi/bsp_process_group.hpp>
2362 #include <boost/graph/distributed/ilu_default_graph.hpp>
2363 #include <boost/graph/distributed/ilu_0_block.hpp>
2364 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2365 #include <boost/graph/distributed/petsc/interface.hpp>
2366 #include <boost/multi_array.hpp>
2367 #include <boost/parallel/distributed_property_map->hpp>
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2371 /*
2372   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2373 */
2374 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2375 {
2376   namespace petsc = boost::distributed::petsc;
2377 
2378   namespace graph_dist = boost::graph::distributed;
2379   using boost::graph::distributed::ilu_default::process_group_type;
2380   using boost::graph::ilu_permuted;
2381 
2382   PetscBool       row_identity, col_identity;
2383   PetscContainer  c;
2384   PetscInt        m, n, M, N;
2385   PetscErrorCode  ierr;
2386 
2387   PetscFunctionBegin;
2388   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2389   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2390   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2391   if (!row_identity || !col_identity) {
2392     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2393   }
2394 
2395   process_group_type pg;
2396   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2397   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2398   lgraph_type&   level_graph = *lgraph_p;
2399   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2400 
2401   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2402   ilu_permuted(level_graph);
2403 
2404   /* put together the new matrix */
2405   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2406   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2407   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2408   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2409   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2410   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2411   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2413 
2414   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2415   ierr = PetscContainerSetPointer(c, lgraph_p);
2416   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2417   ierr = PetscContainerDestroy(&c);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2423 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2424 {
2425   PetscFunctionBegin;
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatSolve_MPIAIJ"
2431 /*
2432   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2433 */
2434 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2435 {
2436   namespace graph_dist = boost::graph::distributed;
2437 
2438   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2439   lgraph_type*   lgraph_p;
2440   PetscContainer c;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2445   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2446   ierr = VecCopy(b, x);CHKERRQ(ierr);
2447 
2448   PetscScalar* array_x;
2449   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2450   PetscInt sx;
2451   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2452 
2453   PetscScalar* array_b;
2454   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2455   PetscInt sb;
2456   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2457 
2458   lgraph_type&   level_graph = *lgraph_p;
2459   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2460 
2461   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2462   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2463                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2464 
2465   typedef boost::iterator_property_map<array_ref_type::iterator,
2466                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2467   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2468                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2469 
2470   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2471 
2472   PetscFunctionReturn(0);
2473 }
2474 #endif
2475 
2476 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2477   PetscInt       nzlocal,nsends,nrecvs;
2478   PetscMPIInt    *send_rank,*recv_rank;
2479   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2480   PetscScalar    *sbuf_a,**rbuf_a;
2481   PetscErrorCode (*Destroy)(Mat);
2482 } Mat_Redundant;
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2486 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2487 {
2488   PetscErrorCode       ierr;
2489   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2490   PetscInt             i;
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2494   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2495   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2496   for (i=0; i<redund->nrecvs; i++){
2497     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2498     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2499   }
2500   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2501   ierr = PetscFree(redund);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatDestroy_MatRedundant"
2507 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2508 {
2509   PetscErrorCode  ierr;
2510   PetscContainer  container;
2511   Mat_Redundant   *redund=PETSC_NULL;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2515   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2516   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2517   A->ops->destroy = redund->Destroy;
2518   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2519   if (A->ops->destroy) {
2520     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2521   }
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2527 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2528 {
2529   PetscMPIInt    rank,size;
2530   MPI_Comm       comm=((PetscObject)mat)->comm;
2531   PetscErrorCode ierr;
2532   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2533   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2534   PetscInt       *rowrange=mat->rmap->range;
2535   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2536   Mat            A=aij->A,B=aij->B,C=*matredundant;
2537   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2538   PetscScalar    *sbuf_a;
2539   PetscInt       nzlocal=a->nz+b->nz;
2540   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2541   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2542   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2543   MatScalar      *aworkA,*aworkB;
2544   PetscScalar    *vals;
2545   PetscMPIInt    tag1,tag2,tag3,imdex;
2546   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2547                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2548   MPI_Status     recv_status,*send_status;
2549   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2550   PetscInt       **rbuf_j=PETSC_NULL;
2551   PetscScalar    **rbuf_a=PETSC_NULL;
2552   Mat_Redundant  *redund=PETSC_NULL;
2553   PetscContainer container;
2554 
2555   PetscFunctionBegin;
2556   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2557   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2558 
2559   if (reuse == MAT_REUSE_MATRIX) {
2560     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2561     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2562     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2563     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2564     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2565     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2566     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2567     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2568 
2569     nsends    = redund->nsends;
2570     nrecvs    = redund->nrecvs;
2571     send_rank = redund->send_rank;
2572     recv_rank = redund->recv_rank;
2573     sbuf_nz   = redund->sbuf_nz;
2574     rbuf_nz   = redund->rbuf_nz;
2575     sbuf_j    = redund->sbuf_j;
2576     sbuf_a    = redund->sbuf_a;
2577     rbuf_j    = redund->rbuf_j;
2578     rbuf_a    = redund->rbuf_a;
2579   }
2580 
2581   if (reuse == MAT_INITIAL_MATRIX){
2582     PetscMPIInt  subrank,subsize;
2583     PetscInt     nleftover,np_subcomm;
2584     /* get the destination processors' id send_rank, nsends and nrecvs */
2585     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2586     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2587     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2588     np_subcomm = size/nsubcomm;
2589     nleftover  = size - nsubcomm*np_subcomm;
2590     nsends = 0; nrecvs = 0;
2591     for (i=0; i<size; i++){ /* i=rank*/
2592       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2593         send_rank[nsends] = i; nsends++;
2594         recv_rank[nrecvs++] = i;
2595       }
2596     }
2597     if (rank >= size - nleftover){/* this proc is a leftover processor */
2598       i = size-nleftover-1;
2599       j = 0;
2600       while (j < nsubcomm - nleftover){
2601         send_rank[nsends++] = i;
2602         i--; j++;
2603       }
2604     }
2605 
2606     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2607       for (i=0; i<nleftover; i++){
2608         recv_rank[nrecvs++] = size-nleftover+i;
2609       }
2610     }
2611 
2612     /* allocate sbuf_j, sbuf_a */
2613     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2614     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2615     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2616   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2617 
2618   /* copy mat's local entries into the buffers */
2619   if (reuse == MAT_INITIAL_MATRIX){
2620     rownz_max = 0;
2621     rptr = sbuf_j;
2622     cols = sbuf_j + rend-rstart + 1;
2623     vals = sbuf_a;
2624     rptr[0] = 0;
2625     for (i=0; i<rend-rstart; i++){
2626       row = i + rstart;
2627       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2628       ncols  = nzA + nzB;
2629       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2630       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2631       /* load the column indices for this row into cols */
2632       lwrite = 0;
2633       for (l=0; l<nzB; l++) {
2634         if ((ctmp = bmap[cworkB[l]]) < cstart){
2635           vals[lwrite]   = aworkB[l];
2636           cols[lwrite++] = ctmp;
2637         }
2638       }
2639       for (l=0; l<nzA; l++){
2640         vals[lwrite]   = aworkA[l];
2641         cols[lwrite++] = cstart + cworkA[l];
2642       }
2643       for (l=0; l<nzB; l++) {
2644         if ((ctmp = bmap[cworkB[l]]) >= cend){
2645           vals[lwrite]   = aworkB[l];
2646           cols[lwrite++] = ctmp;
2647         }
2648       }
2649       vals += ncols;
2650       cols += ncols;
2651       rptr[i+1] = rptr[i] + ncols;
2652       if (rownz_max < ncols) rownz_max = ncols;
2653     }
2654     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2655   } else { /* only copy matrix values into sbuf_a */
2656     rptr = sbuf_j;
2657     vals = sbuf_a;
2658     rptr[0] = 0;
2659     for (i=0; i<rend-rstart; i++){
2660       row = i + rstart;
2661       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2662       ncols  = nzA + nzB;
2663       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2664       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2665       lwrite = 0;
2666       for (l=0; l<nzB; l++) {
2667         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2668       }
2669       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2670       for (l=0; l<nzB; l++) {
2671         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2672       }
2673       vals += ncols;
2674       rptr[i+1] = rptr[i] + ncols;
2675     }
2676   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2677 
2678   /* send nzlocal to others, and recv other's nzlocal */
2679   /*--------------------------------------------------*/
2680   if (reuse == MAT_INITIAL_MATRIX){
2681     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2682     s_waits2 = s_waits3 + nsends;
2683     s_waits1 = s_waits2 + nsends;
2684     r_waits1 = s_waits1 + nsends;
2685     r_waits2 = r_waits1 + nrecvs;
2686     r_waits3 = r_waits2 + nrecvs;
2687   } else {
2688     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2689     r_waits3 = s_waits3 + nsends;
2690   }
2691 
2692   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2693   if (reuse == MAT_INITIAL_MATRIX){
2694     /* get new tags to keep the communication clean */
2695     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2696     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2697     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2698 
2699     /* post receives of other's nzlocal */
2700     for (i=0; i<nrecvs; i++){
2701       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2702     }
2703     /* send nzlocal to others */
2704     for (i=0; i<nsends; i++){
2705       sbuf_nz[i] = nzlocal;
2706       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2707     }
2708     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2709     count = nrecvs;
2710     while (count) {
2711       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2712       recv_rank[imdex] = recv_status.MPI_SOURCE;
2713       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2714       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2715 
2716       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2717       rbuf_nz[imdex] += i + 2;
2718       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2719       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2720       count--;
2721     }
2722     /* wait on sends of nzlocal */
2723     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2724     /* send mat->i,j to others, and recv from other's */
2725     /*------------------------------------------------*/
2726     for (i=0; i<nsends; i++){
2727       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2728       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2729     }
2730     /* wait on receives of mat->i,j */
2731     /*------------------------------*/
2732     count = nrecvs;
2733     while (count) {
2734       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2735       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2736       count--;
2737     }
2738     /* wait on sends of mat->i,j */
2739     /*---------------------------*/
2740     if (nsends) {
2741       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2742     }
2743   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2744 
2745   /* post receives, send and receive mat->a */
2746   /*----------------------------------------*/
2747   for (imdex=0; imdex<nrecvs; imdex++) {
2748     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2749   }
2750   for (i=0; i<nsends; i++){
2751     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2752   }
2753   count = nrecvs;
2754   while (count) {
2755     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2756     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2757     count--;
2758   }
2759   if (nsends) {
2760     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2761   }
2762 
2763   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2764 
2765   /* create redundant matrix */
2766   /*-------------------------*/
2767   if (reuse == MAT_INITIAL_MATRIX){
2768     /* compute rownz_max for preallocation */
2769     for (imdex=0; imdex<nrecvs; imdex++){
2770       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2771       rptr = rbuf_j[imdex];
2772       for (i=0; i<j; i++){
2773         ncols = rptr[i+1] - rptr[i];
2774         if (rownz_max < ncols) rownz_max = ncols;
2775       }
2776     }
2777 
2778     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2779     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2780     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2781     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2782     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2783     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2784   } else {
2785     C = *matredundant;
2786   }
2787 
2788   /* insert local matrix entries */
2789   rptr = sbuf_j;
2790   cols = sbuf_j + rend-rstart + 1;
2791   vals = sbuf_a;
2792   for (i=0; i<rend-rstart; i++){
2793     row   = i + rstart;
2794     ncols = rptr[i+1] - rptr[i];
2795     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2796     vals += ncols;
2797     cols += ncols;
2798   }
2799   /* insert received matrix entries */
2800   for (imdex=0; imdex<nrecvs; imdex++){
2801     rstart = rowrange[recv_rank[imdex]];
2802     rend   = rowrange[recv_rank[imdex]+1];
2803     rptr = rbuf_j[imdex];
2804     cols = rbuf_j[imdex] + rend-rstart + 1;
2805     vals = rbuf_a[imdex];
2806     for (i=0; i<rend-rstart; i++){
2807       row   = i + rstart;
2808       ncols = rptr[i+1] - rptr[i];
2809       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2810       vals += ncols;
2811       cols += ncols;
2812     }
2813   }
2814   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2817   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2818   if (reuse == MAT_INITIAL_MATRIX) {
2819     PetscContainer container;
2820     *matredundant = C;
2821     /* create a supporting struct and attach it to C for reuse */
2822     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2823     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2824     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2825     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2826     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2827     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2828 
2829     redund->nzlocal = nzlocal;
2830     redund->nsends  = nsends;
2831     redund->nrecvs  = nrecvs;
2832     redund->send_rank = send_rank;
2833     redund->recv_rank = recv_rank;
2834     redund->sbuf_nz = sbuf_nz;
2835     redund->rbuf_nz = rbuf_nz;
2836     redund->sbuf_j  = sbuf_j;
2837     redund->sbuf_a  = sbuf_a;
2838     redund->rbuf_j  = rbuf_j;
2839     redund->rbuf_a  = rbuf_a;
2840 
2841     redund->Destroy = C->ops->destroy;
2842     C->ops->destroy = MatDestroy_MatRedundant;
2843   }
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2849 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2850 {
2851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2852   PetscErrorCode ierr;
2853   PetscInt       i,*idxb = 0;
2854   PetscScalar    *va,*vb;
2855   Vec            vtmp;
2856 
2857   PetscFunctionBegin;
2858   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2859   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2860   if (idx) {
2861     for (i=0; i<A->rmap->n; i++) {
2862       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2863     }
2864   }
2865 
2866   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2867   if (idx) {
2868     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2869   }
2870   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2871   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2872 
2873   for (i=0; i<A->rmap->n; i++){
2874     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2875       va[i] = vb[i];
2876       if (idx) idx[i] = a->garray[idxb[i]];
2877     }
2878   }
2879 
2880   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2881   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2882   ierr = PetscFree(idxb);CHKERRQ(ierr);
2883   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2889 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2890 {
2891   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2892   PetscErrorCode ierr;
2893   PetscInt       i,*idxb = 0;
2894   PetscScalar    *va,*vb;
2895   Vec            vtmp;
2896 
2897   PetscFunctionBegin;
2898   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2899   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2900   if (idx) {
2901     for (i=0; i<A->cmap->n; i++) {
2902       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2903     }
2904   }
2905 
2906   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2907   if (idx) {
2908     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2909   }
2910   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2911   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2912 
2913   for (i=0; i<A->rmap->n; i++){
2914     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2915       va[i] = vb[i];
2916       if (idx) idx[i] = a->garray[idxb[i]];
2917     }
2918   }
2919 
2920   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2921   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2922   ierr = PetscFree(idxb);CHKERRQ(ierr);
2923   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2929 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2930 {
2931   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2932   PetscInt       n      = A->rmap->n;
2933   PetscInt       cstart = A->cmap->rstart;
2934   PetscInt      *cmap   = mat->garray;
2935   PetscInt      *diagIdx, *offdiagIdx;
2936   Vec            diagV, offdiagV;
2937   PetscScalar   *a, *diagA, *offdiagA;
2938   PetscInt       r;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2943   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2944   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2945   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2946   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2947   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   for (r = 0; r < n; ++r) {
2951     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2952       a[r]   = diagA[r];
2953       idx[r] = cstart + diagIdx[r];
2954     } else {
2955       a[r]   = offdiagA[r];
2956       idx[r] = cmap[offdiagIdx[r]];
2957     }
2958   }
2959   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2961   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2962   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2963   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2964   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2970 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2971 {
2972   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2973   PetscInt       n      = A->rmap->n;
2974   PetscInt       cstart = A->cmap->rstart;
2975   PetscInt      *cmap   = mat->garray;
2976   PetscInt      *diagIdx, *offdiagIdx;
2977   Vec            diagV, offdiagV;
2978   PetscScalar   *a, *diagA, *offdiagA;
2979   PetscInt       r;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2984   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2985   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2986   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2987   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2989   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2990   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2991   for (r = 0; r < n; ++r) {
2992     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2993       a[r]   = diagA[r];
2994       idx[r] = cstart + diagIdx[r];
2995     } else {
2996       a[r]   = offdiagA[r];
2997       idx[r] = cmap[offdiagIdx[r]];
2998     }
2999   }
3000   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3002   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3003   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3004   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3005   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3011 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3012 {
3013   PetscErrorCode ierr;
3014   Mat            *dummy;
3015 
3016   PetscFunctionBegin;
3017   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3018   *newmat = *dummy;
3019   ierr = PetscFree(dummy);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3024 
3025 #undef __FUNCT__
3026 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3027 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3028 {
3029   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3039 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3040 {
3041   PetscErrorCode ierr;
3042   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3046   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3047   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 /* -------------------------------------------------------------------*/
3053 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3054        MatGetRow_MPIAIJ,
3055        MatRestoreRow_MPIAIJ,
3056        MatMult_MPIAIJ,
3057 /* 4*/ MatMultAdd_MPIAIJ,
3058        MatMultTranspose_MPIAIJ,
3059        MatMultTransposeAdd_MPIAIJ,
3060 #ifdef PETSC_HAVE_PBGL
3061        MatSolve_MPIAIJ,
3062 #else
3063        0,
3064 #endif
3065        0,
3066        0,
3067 /*10*/ 0,
3068        0,
3069        0,
3070        MatSOR_MPIAIJ,
3071        MatTranspose_MPIAIJ,
3072 /*15*/ MatGetInfo_MPIAIJ,
3073        MatEqual_MPIAIJ,
3074        MatGetDiagonal_MPIAIJ,
3075        MatDiagonalScale_MPIAIJ,
3076        MatNorm_MPIAIJ,
3077 /*20*/ MatAssemblyBegin_MPIAIJ,
3078        MatAssemblyEnd_MPIAIJ,
3079        MatSetOption_MPIAIJ,
3080        MatZeroEntries_MPIAIJ,
3081 /*24*/ MatZeroRows_MPIAIJ,
3082        0,
3083 #ifdef PETSC_HAVE_PBGL
3084        0,
3085 #else
3086        0,
3087 #endif
3088        0,
3089        0,
3090 /*29*/ MatSetUp_MPIAIJ,
3091 #ifdef PETSC_HAVE_PBGL
3092        0,
3093 #else
3094        0,
3095 #endif
3096        0,
3097        0,
3098        0,
3099 /*34*/ MatDuplicate_MPIAIJ,
3100        0,
3101        0,
3102        0,
3103        0,
3104 /*39*/ MatAXPY_MPIAIJ,
3105        MatGetSubMatrices_MPIAIJ,
3106        MatIncreaseOverlap_MPIAIJ,
3107        MatGetValues_MPIAIJ,
3108        MatCopy_MPIAIJ,
3109 /*44*/ MatGetRowMax_MPIAIJ,
3110        MatScale_MPIAIJ,
3111        0,
3112        0,
3113        MatZeroRowsColumns_MPIAIJ,
3114 /*49*/ MatSetRandom_MPIAIJ,
3115        0,
3116        0,
3117        0,
3118        0,
3119 /*54*/ MatFDColoringCreate_MPIAIJ,
3120        0,
3121        MatSetUnfactored_MPIAIJ,
3122        0, /* MatPermute_MPIAIJ, impl currently broken */
3123        0,
3124 /*59*/ MatGetSubMatrix_MPIAIJ,
3125        MatDestroy_MPIAIJ,
3126        MatView_MPIAIJ,
3127        0,
3128        0,
3129 /*64*/ 0,
3130        0,
3131        0,
3132        0,
3133        0,
3134 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3135        MatGetRowMinAbs_MPIAIJ,
3136        0,
3137        MatSetColoring_MPIAIJ,
3138 #if defined(PETSC_HAVE_ADIC)
3139        MatSetValuesAdic_MPIAIJ,
3140 #else
3141        0,
3142 #endif
3143        MatSetValuesAdifor_MPIAIJ,
3144 /*75*/ MatFDColoringApply_AIJ,
3145        0,
3146        0,
3147        0,
3148        MatFindZeroDiagonals_MPIAIJ,
3149 /*80*/ 0,
3150        0,
3151        0,
3152 /*83*/ MatLoad_MPIAIJ,
3153        0,
3154        0,
3155        0,
3156        0,
3157        0,
3158 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3159        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3160        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3161        MatPtAP_Basic,
3162        MatPtAPSymbolic_MPIAIJ,
3163 /*94*/ MatPtAPNumeric_MPIAIJ,
3164        0,
3165        0,
3166        0,
3167        0,
3168 /*99*/ 0,
3169        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3170        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3171        MatConjugate_MPIAIJ,
3172        0,
3173 /*104*/MatSetValuesRow_MPIAIJ,
3174        MatRealPart_MPIAIJ,
3175        MatImaginaryPart_MPIAIJ,
3176        0,
3177        0,
3178 /*109*/0,
3179        MatGetRedundantMatrix_MPIAIJ,
3180        MatGetRowMin_MPIAIJ,
3181        0,
3182        0,
3183 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3184        0,
3185        0,
3186        0,
3187        0,
3188 /*119*/0,
3189        0,
3190        0,
3191        0,
3192        MatGetMultiProcBlock_MPIAIJ,
3193 /*124*/MatFindNonzeroRows_MPIAIJ,
3194        MatGetColumnNorms_MPIAIJ,
3195        MatInvertBlockDiagonal_MPIAIJ,
3196        0,
3197        MatGetSubMatricesParallel_MPIAIJ,
3198 /*129*/0,
3199        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3200        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3201        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3202        0,
3203 /*134*/0,
3204        0,
3205        0,
3206        0,
3207        0
3208 };
3209 
3210 /* ----------------------------------------------------------------------------------------*/
3211 
3212 EXTERN_C_BEGIN
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3215 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3216 {
3217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3222   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 EXTERN_C_END
3226 
3227 EXTERN_C_BEGIN
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3230 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3231 {
3232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3237   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 EXTERN_C_END
3241 
3242 EXTERN_C_BEGIN
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3245 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3246 {
3247   Mat_MPIAIJ     *b;
3248   PetscErrorCode ierr;
3249   PetscInt       i;
3250   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3251 
3252   PetscFunctionBegin;
3253   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3254   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3255   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3256   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3257   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3258   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3259 
3260   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3261   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3262   if (d_nnz) {
3263     for (i=0; i<B->rmap->n; i++) {
3264       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3265     }
3266   }
3267   if (o_nnz) {
3268     for (i=0; i<B->rmap->n; i++) {
3269       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3270     }
3271   }
3272   b = (Mat_MPIAIJ*)B->data;
3273 
3274   if (!B->preallocated) {
3275     /* Explicitly create 2 MATSEQAIJ matrices. */
3276     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3277     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3278     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3279     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3280     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3281     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3282     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3283     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3284     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3285     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3286   }
3287 
3288   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3289   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3290   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3291   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3292   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3293   B->preallocated = PETSC_TRUE;
3294   PetscFunctionReturn(0);
3295 }
3296 EXTERN_C_END
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3300 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3301 {
3302   Mat            mat;
3303   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3304   PetscErrorCode ierr;
3305 
3306   PetscFunctionBegin;
3307   *newmat       = 0;
3308   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3309   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3310   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3311   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3312   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3313   a    = (Mat_MPIAIJ*)mat->data;
3314 
3315   mat->factortype    = matin->factortype;
3316   mat->rmap->bs      = matin->rmap->bs;
3317   mat->cmap->bs      = matin->cmap->bs;
3318   mat->assembled    = PETSC_TRUE;
3319   mat->insertmode   = NOT_SET_VALUES;
3320   mat->preallocated = PETSC_TRUE;
3321 
3322   a->size           = oldmat->size;
3323   a->rank           = oldmat->rank;
3324   a->donotstash     = oldmat->donotstash;
3325   a->roworiented    = oldmat->roworiented;
3326   a->rowindices     = 0;
3327   a->rowvalues      = 0;
3328   a->getrowactive   = PETSC_FALSE;
3329 
3330   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3331   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3332 
3333   if (oldmat->colmap) {
3334 #if defined (PETSC_USE_CTABLE)
3335     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3336 #else
3337     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3338     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3339     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3340 #endif
3341   } else a->colmap = 0;
3342   if (oldmat->garray) {
3343     PetscInt len;
3344     len  = oldmat->B->cmap->n;
3345     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3346     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3347     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3348   } else a->garray = 0;
3349 
3350   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3351   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3352   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3353   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3354   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3355   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3356   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3357   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3358   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3359   *newmat = mat;
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 
3364 
3365 #undef __FUNCT__
3366 #define __FUNCT__ "MatLoad_MPIAIJ"
3367 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3368 {
3369   PetscScalar    *vals,*svals;
3370   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3371   PetscErrorCode ierr;
3372   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3373   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3374   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3375   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3376   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3377   int            fd;
3378   PetscInt       bs = 1;
3379 
3380   PetscFunctionBegin;
3381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3382   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3383   if (!rank) {
3384     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3385     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3386     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3387   }
3388 
3389   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3390   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3391   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3392 
3393   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3394 
3395   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3396   M = header[1]; N = header[2];
3397   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3398   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3399   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3400 
3401   /* If global sizes are set, check if they are consistent with that given in the file */
3402   if (sizesset) {
3403     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3404   }
3405   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3406   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3407 
3408   /* determine ownership of all (block) rows */
3409   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3410   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3411   else m = newMat->rmap->n; /* Set by user */
3412 
3413   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3414   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3415 
3416   /* First process needs enough room for process with most rows */
3417   if (!rank) {
3418     mmax = rowners[1];
3419     for (i=2; i<=size; i++) {
3420       mmax = PetscMax(mmax, rowners[i]);
3421     }
3422   } else mmax = m;
3423 
3424   rowners[0] = 0;
3425   for (i=2; i<=size; i++) {
3426     rowners[i] += rowners[i-1];
3427   }
3428   rstart = rowners[rank];
3429   rend   = rowners[rank+1];
3430 
3431   /* distribute row lengths to all processors */
3432   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3433   if (!rank) {
3434     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3435     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3436     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3437     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3438     for (j=0; j<m; j++) {
3439       procsnz[0] += ourlens[j];
3440     }
3441     for (i=1; i<size; i++) {
3442       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3443       /* calculate the number of nonzeros on each processor */
3444       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3445         procsnz[i] += rowlengths[j];
3446       }
3447       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3448     }
3449     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3450   } else {
3451     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3452   }
3453 
3454   if (!rank) {
3455     /* determine max buffer needed and allocate it */
3456     maxnz = 0;
3457     for (i=0; i<size; i++) {
3458       maxnz = PetscMax(maxnz,procsnz[i]);
3459     }
3460     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3461 
3462     /* read in my part of the matrix column indices  */
3463     nz   = procsnz[0];
3464     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3465     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3466 
3467     /* read in every one elses and ship off */
3468     for (i=1; i<size; i++) {
3469       nz     = procsnz[i];
3470       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3471       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3472     }
3473     ierr = PetscFree(cols);CHKERRQ(ierr);
3474   } else {
3475     /* determine buffer space needed for message */
3476     nz = 0;
3477     for (i=0; i<m; i++) {
3478       nz += ourlens[i];
3479     }
3480     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3481 
3482     /* receive message of column indices*/
3483     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3484   }
3485 
3486   /* determine column ownership if matrix is not square */
3487   if (N != M) {
3488     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3489     else n = newMat->cmap->n;
3490     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3491     cstart = cend - n;
3492   } else {
3493     cstart = rstart;
3494     cend   = rend;
3495     n      = cend - cstart;
3496   }
3497 
3498   /* loop over local rows, determining number of off diagonal entries */
3499   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3500   jj = 0;
3501   for (i=0; i<m; i++) {
3502     for (j=0; j<ourlens[i]; j++) {
3503       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3504       jj++;
3505     }
3506   }
3507 
3508   for (i=0; i<m; i++) {
3509     ourlens[i] -= offlens[i];
3510   }
3511   if (!sizesset) {
3512     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3513   }
3514 
3515   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3516 
3517   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3518 
3519   for (i=0; i<m; i++) {
3520     ourlens[i] += offlens[i];
3521   }
3522 
3523   if (!rank) {
3524     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3525 
3526     /* read in my part of the matrix numerical values  */
3527     nz   = procsnz[0];
3528     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3529 
3530     /* insert into matrix */
3531     jj      = rstart;
3532     smycols = mycols;
3533     svals   = vals;
3534     for (i=0; i<m; i++) {
3535       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3536       smycols += ourlens[i];
3537       svals   += ourlens[i];
3538       jj++;
3539     }
3540 
3541     /* read in other processors and ship out */
3542     for (i=1; i<size; i++) {
3543       nz     = procsnz[i];
3544       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3545       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3546     }
3547     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3548   } else {
3549     /* receive numeric values */
3550     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3551 
3552     /* receive message of values*/
3553     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3554 
3555     /* insert into matrix */
3556     jj      = rstart;
3557     smycols = mycols;
3558     svals   = vals;
3559     for (i=0; i<m; i++) {
3560       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3561       smycols += ourlens[i];
3562       svals   += ourlens[i];
3563       jj++;
3564     }
3565   }
3566   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3567   ierr = PetscFree(vals);CHKERRQ(ierr);
3568   ierr = PetscFree(mycols);CHKERRQ(ierr);
3569   ierr = PetscFree(rowners);CHKERRQ(ierr);
3570   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3571   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3572 
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3578 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3579 {
3580   PetscErrorCode ierr;
3581   IS             iscol_local;
3582   PetscInt       csize;
3583 
3584   PetscFunctionBegin;
3585   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3586   if (call == MAT_REUSE_MATRIX) {
3587     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3588     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3589   } else {
3590     PetscInt cbs;
3591     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3592     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3593     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3594   }
3595   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3596   if (call == MAT_INITIAL_MATRIX) {
3597     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3598     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3599   }
3600   PetscFunctionReturn(0);
3601 }
3602 
3603 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3604 #undef __FUNCT__
3605 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3606 /*
3607     Not great since it makes two copies of the submatrix, first an SeqAIJ
3608   in local and then by concatenating the local matrices the end result.
3609   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3610 
3611   Note: This requires a sequential iscol with all indices.
3612 */
3613 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3614 {
3615   PetscErrorCode ierr;
3616   PetscMPIInt    rank,size;
3617   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3618   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3619   PetscBool      allcolumns, colflag;
3620   Mat            M,Mreuse;
3621   MatScalar      *vwork,*aa;
3622   MPI_Comm       comm = ((PetscObject)mat)->comm;
3623   Mat_SeqAIJ     *aij;
3624 
3625 
3626   PetscFunctionBegin;
3627   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3629 
3630   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3631   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3632   if (colflag && ncol == mat->cmap->N){
3633     allcolumns = PETSC_TRUE;
3634   } else {
3635     allcolumns = PETSC_FALSE;
3636   }
3637   if (call ==  MAT_REUSE_MATRIX) {
3638     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3639     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3640     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3641   } else {
3642     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3643   }
3644 
3645   /*
3646       m - number of local rows
3647       n - number of columns (same on all processors)
3648       rstart - first row in new global matrix generated
3649   */
3650   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3651   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3652   if (call == MAT_INITIAL_MATRIX) {
3653     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3654     ii  = aij->i;
3655     jj  = aij->j;
3656 
3657     /*
3658         Determine the number of non-zeros in the diagonal and off-diagonal
3659         portions of the matrix in order to do correct preallocation
3660     */
3661 
3662     /* first get start and end of "diagonal" columns */
3663     if (csize == PETSC_DECIDE) {
3664       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3665       if (mglobal == n) { /* square matrix */
3666 	nlocal = m;
3667       } else {
3668         nlocal = n/size + ((n % size) > rank);
3669       }
3670     } else {
3671       nlocal = csize;
3672     }
3673     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3674     rstart = rend - nlocal;
3675     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3676 
3677     /* next, compute all the lengths */
3678     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3679     olens = dlens + m;
3680     for (i=0; i<m; i++) {
3681       jend = ii[i+1] - ii[i];
3682       olen = 0;
3683       dlen = 0;
3684       for (j=0; j<jend; j++) {
3685         if (*jj < rstart || *jj >= rend) olen++;
3686         else dlen++;
3687         jj++;
3688       }
3689       olens[i] = olen;
3690       dlens[i] = dlen;
3691     }
3692     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3693     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3694     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3695     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3696     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3697     ierr = PetscFree(dlens);CHKERRQ(ierr);
3698   } else {
3699     PetscInt ml,nl;
3700 
3701     M = *newmat;
3702     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3703     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3704     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3705     /*
3706          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3707        rather than the slower MatSetValues().
3708     */
3709     M->was_assembled = PETSC_TRUE;
3710     M->assembled     = PETSC_FALSE;
3711   }
3712   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3713   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3714   ii  = aij->i;
3715   jj  = aij->j;
3716   aa  = aij->a;
3717   for (i=0; i<m; i++) {
3718     row   = rstart + i;
3719     nz    = ii[i+1] - ii[i];
3720     cwork = jj;     jj += nz;
3721     vwork = aa;     aa += nz;
3722     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3723   }
3724 
3725   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727   *newmat = M;
3728 
3729   /* save submatrix used in processor for next request */
3730   if (call ==  MAT_INITIAL_MATRIX) {
3731     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3732     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3733   }
3734 
3735   PetscFunctionReturn(0);
3736 }
3737 
3738 EXTERN_C_BEGIN
3739 #undef __FUNCT__
3740 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3741 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3742 {
3743   PetscInt       m,cstart, cend,j,nnz,i,d;
3744   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3745   const PetscInt *JJ;
3746   PetscScalar    *values;
3747   PetscErrorCode ierr;
3748 
3749   PetscFunctionBegin;
3750   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3751 
3752   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3753   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3754   m      = B->rmap->n;
3755   cstart = B->cmap->rstart;
3756   cend   = B->cmap->rend;
3757   rstart = B->rmap->rstart;
3758 
3759   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3760 
3761 #if defined(PETSC_USE_DEBUGGING)
3762   for (i=0; i<m; i++) {
3763     nnz     = Ii[i+1]- Ii[i];
3764     JJ      = J + Ii[i];
3765     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3766     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3767     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3768   }
3769 #endif
3770 
3771   for (i=0; i<m; i++) {
3772     nnz     = Ii[i+1]- Ii[i];
3773     JJ      = J + Ii[i];
3774     nnz_max = PetscMax(nnz_max,nnz);
3775     d       = 0;
3776     for (j=0; j<nnz; j++) {
3777       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3778     }
3779     d_nnz[i] = d;
3780     o_nnz[i] = nnz - d;
3781   }
3782   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3783   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3784 
3785   if (v) values = (PetscScalar*)v;
3786   else {
3787     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3788     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3789   }
3790 
3791   for (i=0; i<m; i++) {
3792     ii   = i + rstart;
3793     nnz  = Ii[i+1]- Ii[i];
3794     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3795   }
3796   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798 
3799   if (!v) {
3800     ierr = PetscFree(values);CHKERRQ(ierr);
3801   }
3802   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3803   PetscFunctionReturn(0);
3804 }
3805 EXTERN_C_END
3806 
3807 #undef __FUNCT__
3808 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3809 /*@
3810    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3811    (the default parallel PETSc format).
3812 
3813    Collective on MPI_Comm
3814 
3815    Input Parameters:
3816 +  B - the matrix
3817 .  i - the indices into j for the start of each local row (starts with zero)
3818 .  j - the column indices for each local row (starts with zero)
3819 -  v - optional values in the matrix
3820 
3821    Level: developer
3822 
3823    Notes:
3824        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3825      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3826      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3827 
3828        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3829 
3830        The format which is used for the sparse matrix input, is equivalent to a
3831     row-major ordering.. i.e for the following matrix, the input data expected is
3832     as shown:
3833 
3834         1 0 0
3835         2 0 3     P0
3836        -------
3837         4 5 6     P1
3838 
3839      Process0 [P0]: rows_owned=[0,1]
3840         i =  {0,1,3}  [size = nrow+1  = 2+1]
3841         j =  {0,0,2}  [size = nz = 6]
3842         v =  {1,2,3}  [size = nz = 6]
3843 
3844      Process1 [P1]: rows_owned=[2]
3845         i =  {0,3}    [size = nrow+1  = 1+1]
3846         j =  {0,1,2}  [size = nz = 6]
3847         v =  {4,5,6}  [size = nz = 6]
3848 
3849 .keywords: matrix, aij, compressed row, sparse, parallel
3850 
3851 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3852           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3853 @*/
3854 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3855 {
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3860   PetscFunctionReturn(0);
3861 }
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3865 /*@C
3866    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3867    (the default parallel PETSc format).  For good matrix assembly performance
3868    the user should preallocate the matrix storage by setting the parameters
3869    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3870    performance can be increased by more than a factor of 50.
3871 
3872    Collective on MPI_Comm
3873 
3874    Input Parameters:
3875 +  A - the matrix
3876 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3877            (same value is used for all local rows)
3878 .  d_nnz - array containing the number of nonzeros in the various rows of the
3879            DIAGONAL portion of the local submatrix (possibly different for each row)
3880            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3881            The size of this array is equal to the number of local rows, i.e 'm'.
3882            For matrices that will be factored, you must leave room for (and set)
3883            the diagonal entry even if it is zero.
3884 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3885            submatrix (same value is used for all local rows).
3886 -  o_nnz - array containing the number of nonzeros in the various rows of the
3887            OFF-DIAGONAL portion of the local submatrix (possibly different for
3888            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3889            structure. The size of this array is equal to the number
3890            of local rows, i.e 'm'.
3891 
3892    If the *_nnz parameter is given then the *_nz parameter is ignored
3893 
3894    The AIJ format (also called the Yale sparse matrix format or
3895    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3896    storage.  The stored row and column indices begin with zero.
3897    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3898 
3899    The parallel matrix is partitioned such that the first m0 rows belong to
3900    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3901    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3902 
3903    The DIAGONAL portion of the local submatrix of a processor can be defined
3904    as the submatrix which is obtained by extraction the part corresponding to
3905    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3906    first row that belongs to the processor, r2 is the last row belonging to
3907    the this processor, and c1-c2 is range of indices of the local part of a
3908    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3909    common case of a square matrix, the row and column ranges are the same and
3910    the DIAGONAL part is also square. The remaining portion of the local
3911    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3912 
3913    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3914 
3915    You can call MatGetInfo() to get information on how effective the preallocation was;
3916    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3917    You can also run with the option -info and look for messages with the string
3918    malloc in them to see if additional memory allocation was needed.
3919 
3920    Example usage:
3921 
3922    Consider the following 8x8 matrix with 34 non-zero values, that is
3923    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3924    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3925    as follows:
3926 
3927 .vb
3928             1  2  0  |  0  3  0  |  0  4
3929     Proc0   0  5  6  |  7  0  0  |  8  0
3930             9  0 10  | 11  0  0  | 12  0
3931     -------------------------------------
3932            13  0 14  | 15 16 17  |  0  0
3933     Proc1   0 18  0  | 19 20 21  |  0  0
3934             0  0  0  | 22 23  0  | 24  0
3935     -------------------------------------
3936     Proc2  25 26 27  |  0  0 28  | 29  0
3937            30  0  0  | 31 32 33  |  0 34
3938 .ve
3939 
3940    This can be represented as a collection of submatrices as:
3941 
3942 .vb
3943       A B C
3944       D E F
3945       G H I
3946 .ve
3947 
3948    Where the submatrices A,B,C are owned by proc0, D,E,F are
3949    owned by proc1, G,H,I are owned by proc2.
3950 
3951    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3952    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3953    The 'M','N' parameters are 8,8, and have the same values on all procs.
3954 
3955    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3956    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3957    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3958    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3959    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3960    matrix, ans [DF] as another SeqAIJ matrix.
3961 
3962    When d_nz, o_nz parameters are specified, d_nz storage elements are
3963    allocated for every row of the local diagonal submatrix, and o_nz
3964    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3965    One way to choose d_nz and o_nz is to use the max nonzerors per local
3966    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3967    In this case, the values of d_nz,o_nz are:
3968 .vb
3969      proc0 : dnz = 2, o_nz = 2
3970      proc1 : dnz = 3, o_nz = 2
3971      proc2 : dnz = 1, o_nz = 4
3972 .ve
3973    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3974    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3975    for proc3. i.e we are using 12+15+10=37 storage locations to store
3976    34 values.
3977 
3978    When d_nnz, o_nnz parameters are specified, the storage is specified
3979    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3980    In the above case the values for d_nnz,o_nnz are:
3981 .vb
3982      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3983      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3984      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3985 .ve
3986    Here the space allocated is sum of all the above values i.e 34, and
3987    hence pre-allocation is perfect.
3988 
3989    Level: intermediate
3990 
3991 .keywords: matrix, aij, compressed row, sparse, parallel
3992 
3993 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3994           MPIAIJ, MatGetInfo()
3995 @*/
3996 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3997 {
3998   PetscErrorCode ierr;
3999 
4000   PetscFunctionBegin;
4001   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4002   PetscValidType(B,1);
4003   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4004   PetscFunctionReturn(0);
4005 }
4006 
4007 #undef __FUNCT__
4008 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4009 /*@
4010      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4011          CSR format the local rows.
4012 
4013    Collective on MPI_Comm
4014 
4015    Input Parameters:
4016 +  comm - MPI communicator
4017 .  m - number of local rows (Cannot be PETSC_DECIDE)
4018 .  n - This value should be the same as the local size used in creating the
4019        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4020        calculated if N is given) For square matrices n is almost always m.
4021 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4022 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4023 .   i - row indices
4024 .   j - column indices
4025 -   a - matrix values
4026 
4027    Output Parameter:
4028 .   mat - the matrix
4029 
4030    Level: intermediate
4031 
4032    Notes:
4033        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4034      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4035      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4036 
4037        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4038 
4039        The format which is used for the sparse matrix input, is equivalent to a
4040     row-major ordering.. i.e for the following matrix, the input data expected is
4041     as shown:
4042 
4043         1 0 0
4044         2 0 3     P0
4045        -------
4046         4 5 6     P1
4047 
4048      Process0 [P0]: rows_owned=[0,1]
4049         i =  {0,1,3}  [size = nrow+1  = 2+1]
4050         j =  {0,0,2}  [size = nz = 6]
4051         v =  {1,2,3}  [size = nz = 6]
4052 
4053      Process1 [P1]: rows_owned=[2]
4054         i =  {0,3}    [size = nrow+1  = 1+1]
4055         j =  {0,1,2}  [size = nz = 6]
4056         v =  {4,5,6}  [size = nz = 6]
4057 
4058 .keywords: matrix, aij, compressed row, sparse, parallel
4059 
4060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4061           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4062 @*/
4063 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4064 {
4065   PetscErrorCode ierr;
4066 
4067   PetscFunctionBegin;
4068   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4069   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4070   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4071   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4072   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4073   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4074   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 #undef __FUNCT__
4079 #define __FUNCT__ "MatCreateAIJ"
4080 /*@C
4081    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4082    (the default parallel PETSc format).  For good matrix assembly performance
4083    the user should preallocate the matrix storage by setting the parameters
4084    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4085    performance can be increased by more than a factor of 50.
4086 
4087    Collective on MPI_Comm
4088 
4089    Input Parameters:
4090 +  comm - MPI communicator
4091 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4092            This value should be the same as the local size used in creating the
4093            y vector for the matrix-vector product y = Ax.
4094 .  n - This value should be the same as the local size used in creating the
4095        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4096        calculated if N is given) For square matrices n is almost always m.
4097 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4098 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4099 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4100            (same value is used for all local rows)
4101 .  d_nnz - array containing the number of nonzeros in the various rows of the
4102            DIAGONAL portion of the local submatrix (possibly different for each row)
4103            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4104            The size of this array is equal to the number of local rows, i.e 'm'.
4105 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4106            submatrix (same value is used for all local rows).
4107 -  o_nnz - array containing the number of nonzeros in the various rows of the
4108            OFF-DIAGONAL portion of the local submatrix (possibly different for
4109            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4110            structure. The size of this array is equal to the number
4111            of local rows, i.e 'm'.
4112 
4113    Output Parameter:
4114 .  A - the matrix
4115 
4116    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4117    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4118    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4119 
4120    Notes:
4121    If the *_nnz parameter is given then the *_nz parameter is ignored
4122 
4123    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4124    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4125    storage requirements for this matrix.
4126 
4127    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4128    processor than it must be used on all processors that share the object for
4129    that argument.
4130 
4131    The user MUST specify either the local or global matrix dimensions
4132    (possibly both).
4133 
4134    The parallel matrix is partitioned across processors such that the
4135    first m0 rows belong to process 0, the next m1 rows belong to
4136    process 1, the next m2 rows belong to process 2 etc.. where
4137    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4138    values corresponding to [m x N] submatrix.
4139 
4140    The columns are logically partitioned with the n0 columns belonging
4141    to 0th partition, the next n1 columns belonging to the next
4142    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4143 
4144    The DIAGONAL portion of the local submatrix on any given processor
4145    is the submatrix corresponding to the rows and columns m,n
4146    corresponding to the given processor. i.e diagonal matrix on
4147    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4148    etc. The remaining portion of the local submatrix [m x (N-n)]
4149    constitute the OFF-DIAGONAL portion. The example below better
4150    illustrates this concept.
4151 
4152    For a square global matrix we define each processor's diagonal portion
4153    to be its local rows and the corresponding columns (a square submatrix);
4154    each processor's off-diagonal portion encompasses the remainder of the
4155    local matrix (a rectangular submatrix).
4156 
4157    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4158 
4159    When calling this routine with a single process communicator, a matrix of
4160    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4161    type of communicator, use the construction mechanism:
4162      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4163 
4164    By default, this format uses inodes (identical nodes) when possible.
4165    We search for consecutive rows with the same nonzero structure, thereby
4166    reusing matrix information to achieve increased efficiency.
4167 
4168    Options Database Keys:
4169 +  -mat_no_inode  - Do not use inodes
4170 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4171 -  -mat_aij_oneindex - Internally use indexing starting at 1
4172         rather than 0.  Note that when calling MatSetValues(),
4173         the user still MUST index entries starting at 0!
4174 
4175 
4176    Example usage:
4177 
4178    Consider the following 8x8 matrix with 34 non-zero values, that is
4179    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4180    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4181    as follows:
4182 
4183 .vb
4184             1  2  0  |  0  3  0  |  0  4
4185     Proc0   0  5  6  |  7  0  0  |  8  0
4186             9  0 10  | 11  0  0  | 12  0
4187     -------------------------------------
4188            13  0 14  | 15 16 17  |  0  0
4189     Proc1   0 18  0  | 19 20 21  |  0  0
4190             0  0  0  | 22 23  0  | 24  0
4191     -------------------------------------
4192     Proc2  25 26 27  |  0  0 28  | 29  0
4193            30  0  0  | 31 32 33  |  0 34
4194 .ve
4195 
4196    This can be represented as a collection of submatrices as:
4197 
4198 .vb
4199       A B C
4200       D E F
4201       G H I
4202 .ve
4203 
4204    Where the submatrices A,B,C are owned by proc0, D,E,F are
4205    owned by proc1, G,H,I are owned by proc2.
4206 
4207    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4208    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4209    The 'M','N' parameters are 8,8, and have the same values on all procs.
4210 
4211    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4212    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4213    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4214    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4215    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4216    matrix, ans [DF] as another SeqAIJ matrix.
4217 
4218    When d_nz, o_nz parameters are specified, d_nz storage elements are
4219    allocated for every row of the local diagonal submatrix, and o_nz
4220    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4221    One way to choose d_nz and o_nz is to use the max nonzerors per local
4222    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4223    In this case, the values of d_nz,o_nz are:
4224 .vb
4225      proc0 : dnz = 2, o_nz = 2
4226      proc1 : dnz = 3, o_nz = 2
4227      proc2 : dnz = 1, o_nz = 4
4228 .ve
4229    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4230    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4231    for proc3. i.e we are using 12+15+10=37 storage locations to store
4232    34 values.
4233 
4234    When d_nnz, o_nnz parameters are specified, the storage is specified
4235    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4236    In the above case the values for d_nnz,o_nnz are:
4237 .vb
4238      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4239      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4240      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4241 .ve
4242    Here the space allocated is sum of all the above values i.e 34, and
4243    hence pre-allocation is perfect.
4244 
4245    Level: intermediate
4246 
4247 .keywords: matrix, aij, compressed row, sparse, parallel
4248 
4249 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4250           MPIAIJ, MatCreateMPIAIJWithArrays()
4251 @*/
4252 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4253 {
4254   PetscErrorCode ierr;
4255   PetscMPIInt    size;
4256 
4257   PetscFunctionBegin;
4258   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4259   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4260   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4261   if (size > 1) {
4262     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4263     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4264   } else {
4265     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4266     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4267   }
4268   PetscFunctionReturn(0);
4269 }
4270 
4271 #undef __FUNCT__
4272 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4273 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4274 {
4275   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4276 
4277   PetscFunctionBegin;
4278   *Ad     = a->A;
4279   *Ao     = a->B;
4280   *colmap = a->garray;
4281   PetscFunctionReturn(0);
4282 }
4283 
4284 #undef __FUNCT__
4285 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4286 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4287 {
4288   PetscErrorCode ierr;
4289   PetscInt       i;
4290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4291 
4292   PetscFunctionBegin;
4293   if (coloring->ctype == IS_COLORING_GLOBAL) {
4294     ISColoringValue *allcolors,*colors;
4295     ISColoring      ocoloring;
4296 
4297     /* set coloring for diagonal portion */
4298     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4299 
4300     /* set coloring for off-diagonal portion */
4301     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4302     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4303     for (i=0; i<a->B->cmap->n; i++) {
4304       colors[i] = allcolors[a->garray[i]];
4305     }
4306     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4307     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4308     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4309     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4310   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4311     ISColoringValue *colors;
4312     PetscInt        *larray;
4313     ISColoring      ocoloring;
4314 
4315     /* set coloring for diagonal portion */
4316     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4317     for (i=0; i<a->A->cmap->n; i++) {
4318       larray[i] = i + A->cmap->rstart;
4319     }
4320     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4321     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4322     for (i=0; i<a->A->cmap->n; i++) {
4323       colors[i] = coloring->colors[larray[i]];
4324     }
4325     ierr = PetscFree(larray);CHKERRQ(ierr);
4326     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4327     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4328     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4329 
4330     /* set coloring for off-diagonal portion */
4331     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4332     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4333     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4334     for (i=0; i<a->B->cmap->n; i++) {
4335       colors[i] = coloring->colors[larray[i]];
4336     }
4337     ierr = PetscFree(larray);CHKERRQ(ierr);
4338     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4339     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4340     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4341   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4342 
4343   PetscFunctionReturn(0);
4344 }
4345 
4346 #if defined(PETSC_HAVE_ADIC)
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4349 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4350 {
4351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4352   PetscErrorCode ierr;
4353 
4354   PetscFunctionBegin;
4355   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4356   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4357   PetscFunctionReturn(0);
4358 }
4359 #endif
4360 
4361 #undef __FUNCT__
4362 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4363 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4364 {
4365   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4366   PetscErrorCode ierr;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4370   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 #undef __FUNCT__
4375 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4376 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4377 {
4378   PetscErrorCode ierr;
4379   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4380   PetscInt       *indx;
4381 
4382   PetscFunctionBegin;
4383   /* This routine will ONLY return MPIAIJ type matrix */
4384   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4385   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4386   if (n == PETSC_DECIDE){
4387     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4388   }
4389   /* Check sum(n) = N */
4390   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4391   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4392 
4393   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4394   rstart -= m;
4395 
4396   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4397   for (i=0;i<m;i++) {
4398     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4399     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4400     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4401   }
4402 
4403   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4404   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4405   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4406   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4407   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4408   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4409   PetscFunctionReturn(0);
4410 }
4411 
4412 #undef __FUNCT__
4413 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4414 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4415 {
4416   PetscErrorCode ierr;
4417   PetscInt       m,N,i,rstart,nnz,Ii;
4418   PetscInt       *indx;
4419   PetscScalar    *values;
4420 
4421   PetscFunctionBegin;
4422   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4423   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4424   for (i=0;i<m;i++) {
4425     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4426     Ii    = i + rstart;
4427     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4428     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4429   }
4430   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4431   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4432   PetscFunctionReturn(0);
4433 }
4434 
4435 #undef __FUNCT__
4436 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4437 /*@
4438       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4439                  matrices from each processor
4440 
4441     Collective on MPI_Comm
4442 
4443    Input Parameters:
4444 +    comm - the communicators the parallel matrix will live on
4445 .    inmat - the input sequential matrices
4446 .    n - number of local columns (or PETSC_DECIDE)
4447 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4448 
4449    Output Parameter:
4450 .    outmat - the parallel matrix generated
4451 
4452     Level: advanced
4453 
4454    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4455 
4456 @*/
4457 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4458 {
4459   PetscErrorCode ierr;
4460 
4461   PetscFunctionBegin;
4462   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4463   if (scall == MAT_INITIAL_MATRIX){
4464     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4465   }
4466   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4467   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatFileSplit"
4473 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4474 {
4475   PetscErrorCode    ierr;
4476   PetscMPIInt       rank;
4477   PetscInt          m,N,i,rstart,nnz;
4478   size_t            len;
4479   const PetscInt    *indx;
4480   PetscViewer       out;
4481   char              *name;
4482   Mat               B;
4483   const PetscScalar *values;
4484 
4485   PetscFunctionBegin;
4486   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4487   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4488   /* Should this be the type of the diagonal block of A? */
4489   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4490   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4491   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4492   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4493   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4494   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4495   for (i=0;i<m;i++) {
4496     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4497     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4498     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4499   }
4500   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4502 
4503   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4504   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4505   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4506   sprintf(name,"%s.%d",outfile,rank);
4507   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4508   ierr = PetscFree(name);
4509   ierr = MatView(B,out);CHKERRQ(ierr);
4510   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4511   ierr = MatDestroy(&B);CHKERRQ(ierr);
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4516 #undef __FUNCT__
4517 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4518 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4519 {
4520   PetscErrorCode       ierr;
4521   Mat_Merge_SeqsToMPI  *merge;
4522   PetscContainer       container;
4523 
4524   PetscFunctionBegin;
4525   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4526   if (container) {
4527     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4539     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4540     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4541     ierr = PetscFree(merge);CHKERRQ(ierr);
4542     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4543   }
4544   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 #include <../src/mat/utils/freespace.h>
4549 #include <petscbt.h>
4550 
4551 #undef __FUNCT__
4552 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4553 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4554 {
4555   PetscErrorCode       ierr;
4556   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4557   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4558   PetscMPIInt          size,rank,taga,*len_s;
4559   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4560   PetscInt             proc,m;
4561   PetscInt             **buf_ri,**buf_rj;
4562   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4563   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4564   MPI_Request          *s_waits,*r_waits;
4565   MPI_Status           *status;
4566   MatScalar            *aa=a->a;
4567   MatScalar            **abuf_r,*ba_i;
4568   Mat_Merge_SeqsToMPI  *merge;
4569   PetscContainer       container;
4570 
4571   PetscFunctionBegin;
4572   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4573 
4574   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4575   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4576 
4577   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4578   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4579 
4580   bi     = merge->bi;
4581   bj     = merge->bj;
4582   buf_ri = merge->buf_ri;
4583   buf_rj = merge->buf_rj;
4584 
4585   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4586   owners = merge->rowmap->range;
4587   len_s  = merge->len_s;
4588 
4589   /* send and recv matrix values */
4590   /*-----------------------------*/
4591   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4592   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4593 
4594   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4595   for (proc=0,k=0; proc<size; proc++){
4596     if (!len_s[proc]) continue;
4597     i = owners[proc];
4598     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4599     k++;
4600   }
4601 
4602   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4603   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4604   ierr = PetscFree(status);CHKERRQ(ierr);
4605 
4606   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4607   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4608 
4609   /* insert mat values of mpimat */
4610   /*----------------------------*/
4611   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4612   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4613 
4614   for (k=0; k<merge->nrecv; k++){
4615     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4616     nrows = *(buf_ri_k[k]);
4617     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4618     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4619   }
4620 
4621   /* set values of ba */
4622   m = merge->rowmap->n;
4623   for (i=0; i<m; i++) {
4624     arow = owners[rank] + i;
4625     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4626     bnzi = bi[i+1] - bi[i];
4627     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4628 
4629     /* add local non-zero vals of this proc's seqmat into ba */
4630     anzi = ai[arow+1] - ai[arow];
4631     aj   = a->j + ai[arow];
4632     aa   = a->a + ai[arow];
4633     nextaj = 0;
4634     for (j=0; nextaj<anzi; j++){
4635       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4636         ba_i[j] += aa[nextaj++];
4637       }
4638     }
4639 
4640     /* add received vals into ba */
4641     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4642       /* i-th row */
4643       if (i == *nextrow[k]) {
4644         anzi = *(nextai[k]+1) - *nextai[k];
4645         aj   = buf_rj[k] + *(nextai[k]);
4646         aa   = abuf_r[k] + *(nextai[k]);
4647         nextaj = 0;
4648         for (j=0; nextaj<anzi; j++){
4649           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4650             ba_i[j] += aa[nextaj++];
4651           }
4652         }
4653         nextrow[k]++; nextai[k]++;
4654       }
4655     }
4656     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4657   }
4658   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4659   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4660 
4661   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4662   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4663   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4664   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4665   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4666   PetscFunctionReturn(0);
4667 }
4668 
4669 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4670 
4671 #undef __FUNCT__
4672 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4673 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4674 {
4675   PetscErrorCode       ierr;
4676   Mat                  B_mpi;
4677   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4678   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4679   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4680   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4681   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4682   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4683   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4684   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4685   MPI_Status           *status;
4686   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4687   PetscBT              lnkbt;
4688   Mat_Merge_SeqsToMPI  *merge;
4689   PetscContainer       container;
4690 
4691   PetscFunctionBegin;
4692   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4693 
4694   /* make sure it is a PETSc comm */
4695   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4696   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4698 
4699   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4700   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4701 
4702   /* determine row ownership */
4703   /*---------------------------------------------------------*/
4704   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4705   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4706   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4707   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4708   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4709   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4710   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4711 
4712   m      = merge->rowmap->n;
4713   M      = merge->rowmap->N;
4714   owners = merge->rowmap->range;
4715 
4716   /* determine the number of messages to send, their lengths */
4717   /*---------------------------------------------------------*/
4718   len_s  = merge->len_s;
4719 
4720   len = 0;  /* length of buf_si[] */
4721   merge->nsend = 0;
4722   for (proc=0; proc<size; proc++){
4723     len_si[proc] = 0;
4724     if (proc == rank){
4725       len_s[proc] = 0;
4726     } else {
4727       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4728       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4729     }
4730     if (len_s[proc]) {
4731       merge->nsend++;
4732       nrows = 0;
4733       for (i=owners[proc]; i<owners[proc+1]; i++){
4734         if (ai[i+1] > ai[i]) nrows++;
4735       }
4736       len_si[proc] = 2*(nrows+1);
4737       len += len_si[proc];
4738     }
4739   }
4740 
4741   /* determine the number and length of messages to receive for ij-structure */
4742   /*-------------------------------------------------------------------------*/
4743   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4744   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4745 
4746   /* post the Irecv of j-structure */
4747   /*-------------------------------*/
4748   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4749   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4750 
4751   /* post the Isend of j-structure */
4752   /*--------------------------------*/
4753   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4754 
4755   for (proc=0, k=0; proc<size; proc++){
4756     if (!len_s[proc]) continue;
4757     i = owners[proc];
4758     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4759     k++;
4760   }
4761 
4762   /* receives and sends of j-structure are complete */
4763   /*------------------------------------------------*/
4764   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4765   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4766 
4767   /* send and recv i-structure */
4768   /*---------------------------*/
4769   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4770   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4771 
4772   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4773   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4774   for (proc=0,k=0; proc<size; proc++){
4775     if (!len_s[proc]) continue;
4776     /* form outgoing message for i-structure:
4777          buf_si[0]:                 nrows to be sent
4778                [1:nrows]:           row index (global)
4779                [nrows+1:2*nrows+1]: i-structure index
4780     */
4781     /*-------------------------------------------*/
4782     nrows = len_si[proc]/2 - 1;
4783     buf_si_i    = buf_si + nrows+1;
4784     buf_si[0]   = nrows;
4785     buf_si_i[0] = 0;
4786     nrows = 0;
4787     for (i=owners[proc]; i<owners[proc+1]; i++){
4788       anzi = ai[i+1] - ai[i];
4789       if (anzi) {
4790         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4791         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4792         nrows++;
4793       }
4794     }
4795     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4796     k++;
4797     buf_si += len_si[proc];
4798   }
4799 
4800   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4801   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4802 
4803   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4804   for (i=0; i<merge->nrecv; i++){
4805     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4806   }
4807 
4808   ierr = PetscFree(len_si);CHKERRQ(ierr);
4809   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4810   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4811   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4812   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4813   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4814   ierr = PetscFree(status);CHKERRQ(ierr);
4815 
4816   /* compute a local seq matrix in each processor */
4817   /*----------------------------------------------*/
4818   /* allocate bi array and free space for accumulating nonzero column info */
4819   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4820   bi[0] = 0;
4821 
4822   /* create and initialize a linked list */
4823   nlnk = N+1;
4824   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4825 
4826   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4827   len = 0;
4828   len  = ai[owners[rank+1]] - ai[owners[rank]];
4829   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4830   current_space = free_space;
4831 
4832   /* determine symbolic info for each local row */
4833   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4834 
4835   for (k=0; k<merge->nrecv; k++){
4836     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4837     nrows = *buf_ri_k[k];
4838     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4839     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4840   }
4841 
4842   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4843   len = 0;
4844   for (i=0;i<m;i++) {
4845     bnzi   = 0;
4846     /* add local non-zero cols of this proc's seqmat into lnk */
4847     arow   = owners[rank] + i;
4848     anzi   = ai[arow+1] - ai[arow];
4849     aj     = a->j + ai[arow];
4850     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4851     bnzi += nlnk;
4852     /* add received col data into lnk */
4853     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4854       if (i == *nextrow[k]) { /* i-th row */
4855         anzi = *(nextai[k]+1) - *nextai[k];
4856         aj   = buf_rj[k] + *nextai[k];
4857         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4858         bnzi += nlnk;
4859         nextrow[k]++; nextai[k]++;
4860       }
4861     }
4862     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4863 
4864     /* if free space is not available, make more free space */
4865     if (current_space->local_remaining<bnzi) {
4866       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4867       nspacedouble++;
4868     }
4869     /* copy data into free space, then initialize lnk */
4870     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4871     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4872 
4873     current_space->array           += bnzi;
4874     current_space->local_used      += bnzi;
4875     current_space->local_remaining -= bnzi;
4876 
4877     bi[i+1] = bi[i] + bnzi;
4878   }
4879 
4880   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4881 
4882   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4883   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4884   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4885 
4886   /* create symbolic parallel matrix B_mpi */
4887   /*---------------------------------------*/
4888     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4889   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4890   if (n==PETSC_DECIDE) {
4891     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4892   } else {
4893     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4894   }
4895   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4896   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4897   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4898   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4899   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4900 
4901   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4902   B_mpi->assembled     = PETSC_FALSE;
4903   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4904   merge->bi            = bi;
4905   merge->bj            = bj;
4906   merge->buf_ri        = buf_ri;
4907   merge->buf_rj        = buf_rj;
4908   merge->coi           = PETSC_NULL;
4909   merge->coj           = PETSC_NULL;
4910   merge->owners_co     = PETSC_NULL;
4911 
4912   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4913 
4914   /* attach the supporting struct to B_mpi for reuse */
4915   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4916   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4917   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4918   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4919   *mpimat = B_mpi;
4920 
4921   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4922   PetscFunctionReturn(0);
4923 }
4924 
4925 #undef __FUNCT__
4926 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4927 /*@C
4928       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4929                  matrices from each processor
4930 
4931     Collective on MPI_Comm
4932 
4933    Input Parameters:
4934 +    comm - the communicators the parallel matrix will live on
4935 .    seqmat - the input sequential matrices
4936 .    m - number of local rows (or PETSC_DECIDE)
4937 .    n - number of local columns (or PETSC_DECIDE)
4938 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 
4940    Output Parameter:
4941 .    mpimat - the parallel matrix generated
4942 
4943     Level: advanced
4944 
4945    Notes:
4946      The dimensions of the sequential matrix in each processor MUST be the same.
4947      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4948      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4949 @*/
4950 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4951 {
4952   PetscErrorCode   ierr;
4953   PetscMPIInt     size;
4954 
4955   PetscFunctionBegin;
4956   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4957   if (size == 1){
4958      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4959      if (scall == MAT_INITIAL_MATRIX){
4960        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4961      } else {
4962        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4963      }
4964      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965      PetscFunctionReturn(0);
4966   }
4967   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4968   if (scall == MAT_INITIAL_MATRIX){
4969     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4970   }
4971   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4972   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973   PetscFunctionReturn(0);
4974 }
4975 
4976 #undef __FUNCT__
4977 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4978 /*@
4979      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4980           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4981           with MatGetSize()
4982 
4983     Not Collective
4984 
4985    Input Parameters:
4986 +    A - the matrix
4987 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4988 
4989    Output Parameter:
4990 .    A_loc - the local sequential matrix generated
4991 
4992     Level: developer
4993 
4994 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4995 
4996 @*/
4997 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4998 {
4999   PetscErrorCode  ierr;
5000   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5001   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5002   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5003   MatScalar       *aa=a->a,*ba=b->a,*cam;
5004   PetscScalar     *ca;
5005   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5006   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5007   PetscBool       match;
5008 
5009   PetscFunctionBegin;
5010   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5011   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5012   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5013   if (scall == MAT_INITIAL_MATRIX){
5014     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5015     ci[0] = 0;
5016     for (i=0; i<am; i++){
5017       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5018     }
5019     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5020     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5021     k = 0;
5022     for (i=0; i<am; i++) {
5023       ncols_o = bi[i+1] - bi[i];
5024       ncols_d = ai[i+1] - ai[i];
5025       /* off-diagonal portion of A */
5026       for (jo=0; jo<ncols_o; jo++) {
5027         col = cmap[*bj];
5028         if (col >= cstart) break;
5029         cj[k]   = col; bj++;
5030         ca[k++] = *ba++;
5031       }
5032       /* diagonal portion of A */
5033       for (j=0; j<ncols_d; j++) {
5034         cj[k]   = cstart + *aj++;
5035         ca[k++] = *aa++;
5036       }
5037       /* off-diagonal portion of A */
5038       for (j=jo; j<ncols_o; j++) {
5039         cj[k]   = cmap[*bj++];
5040         ca[k++] = *ba++;
5041       }
5042     }
5043     /* put together the new matrix */
5044     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5045     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5046     /* Since these are PETSc arrays, change flags to free them as necessary. */
5047     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5048     mat->free_a  = PETSC_TRUE;
5049     mat->free_ij = PETSC_TRUE;
5050     mat->nonew   = 0;
5051   } else if (scall == MAT_REUSE_MATRIX){
5052     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5053     ci = mat->i; cj = mat->j; cam = mat->a;
5054     for (i=0; i<am; i++) {
5055       /* off-diagonal portion of A */
5056       ncols_o = bi[i+1] - bi[i];
5057       for (jo=0; jo<ncols_o; jo++) {
5058         col = cmap[*bj];
5059         if (col >= cstart) break;
5060         *cam++ = *ba++; bj++;
5061       }
5062       /* diagonal portion of A */
5063       ncols_d = ai[i+1] - ai[i];
5064       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5065       /* off-diagonal portion of A */
5066       for (j=jo; j<ncols_o; j++) {
5067         *cam++ = *ba++; bj++;
5068       }
5069     }
5070   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5071   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 #undef __FUNCT__
5076 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5077 /*@C
5078      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5079 
5080     Not Collective
5081 
5082    Input Parameters:
5083 +    A - the matrix
5084 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5086 
5087    Output Parameter:
5088 .    A_loc - the local sequential matrix generated
5089 
5090     Level: developer
5091 
5092 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5093 
5094 @*/
5095 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5096 {
5097   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5098   PetscErrorCode    ierr;
5099   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5100   IS                isrowa,iscola;
5101   Mat               *aloc;
5102   PetscBool       match;
5103 
5104   PetscFunctionBegin;
5105   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5106   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5107   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5108   if (!row){
5109     start = A->rmap->rstart; end = A->rmap->rend;
5110     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5111   } else {
5112     isrowa = *row;
5113   }
5114   if (!col){
5115     start = A->cmap->rstart;
5116     cmap  = a->garray;
5117     nzA   = a->A->cmap->n;
5118     nzB   = a->B->cmap->n;
5119     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5120     ncols = 0;
5121     for (i=0; i<nzB; i++) {
5122       if (cmap[i] < start) idx[ncols++] = cmap[i];
5123       else break;
5124     }
5125     imark = i;
5126     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5127     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5128     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5129   } else {
5130     iscola = *col;
5131   }
5132   if (scall != MAT_INITIAL_MATRIX){
5133     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5134     aloc[0] = *A_loc;
5135   }
5136   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5137   *A_loc = aloc[0];
5138   ierr = PetscFree(aloc);CHKERRQ(ierr);
5139   if (!row){
5140     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5141   }
5142   if (!col){
5143     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5144   }
5145   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 #undef __FUNCT__
5150 #define __FUNCT__ "MatGetBrowsOfAcols"
5151 /*@C
5152     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5153 
5154     Collective on Mat
5155 
5156    Input Parameters:
5157 +    A,B - the matrices in mpiaij format
5158 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5159 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5160 
5161    Output Parameter:
5162 +    rowb, colb - index sets of rows and columns of B to extract
5163 -    B_seq - the sequential matrix generated
5164 
5165     Level: developer
5166 
5167 @*/
5168 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5169 {
5170   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5171   PetscErrorCode    ierr;
5172   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5173   IS                isrowb,iscolb;
5174   Mat               *bseq=PETSC_NULL;
5175 
5176   PetscFunctionBegin;
5177   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5178     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5179   }
5180   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5181 
5182   if (scall == MAT_INITIAL_MATRIX){
5183     start = A->cmap->rstart;
5184     cmap  = a->garray;
5185     nzA   = a->A->cmap->n;
5186     nzB   = a->B->cmap->n;
5187     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5188     ncols = 0;
5189     for (i=0; i<nzB; i++) {  /* row < local row index */
5190       if (cmap[i] < start) idx[ncols++] = cmap[i];
5191       else break;
5192     }
5193     imark = i;
5194     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5195     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5196     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5197     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5198   } else {
5199     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5200     isrowb = *rowb; iscolb = *colb;
5201     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5202     bseq[0] = *B_seq;
5203   }
5204   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5205   *B_seq = bseq[0];
5206   ierr = PetscFree(bseq);CHKERRQ(ierr);
5207   if (!rowb){
5208     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5209   } else {
5210     *rowb = isrowb;
5211   }
5212   if (!colb){
5213     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5214   } else {
5215     *colb = iscolb;
5216   }
5217   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5218   PetscFunctionReturn(0);
5219 }
5220 
5221 #undef __FUNCT__
5222 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5223 /*
5224     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5225     of the OFF-DIAGONAL portion of local A
5226 
5227     Collective on Mat
5228 
5229    Input Parameters:
5230 +    A,B - the matrices in mpiaij format
5231 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5232 
5233    Output Parameter:
5234 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5235 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5236 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5237 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5238 
5239     Level: developer
5240 
5241 */
5242 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5243 {
5244   VecScatter_MPI_General *gen_to,*gen_from;
5245   PetscErrorCode         ierr;
5246   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5247   Mat_SeqAIJ             *b_oth;
5248   VecScatter             ctx=a->Mvctx;
5249   MPI_Comm               comm=((PetscObject)ctx)->comm;
5250   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5251   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5252   PetscScalar            *rvalues,*svalues;
5253   MatScalar              *b_otha,*bufa,*bufA;
5254   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5255   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5256   MPI_Status             *sstatus,rstatus;
5257   PetscMPIInt            jj;
5258   PetscInt               *cols,sbs,rbs;
5259   PetscScalar            *vals;
5260 
5261   PetscFunctionBegin;
5262   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5263     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5264   }
5265   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5266   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5267 
5268   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5269   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5270   rvalues  = gen_from->values; /* holds the length of receiving row */
5271   svalues  = gen_to->values;   /* holds the length of sending row */
5272   nrecvs   = gen_from->n;
5273   nsends   = gen_to->n;
5274 
5275   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5276   srow     = gen_to->indices;   /* local row index to be sent */
5277   sstarts  = gen_to->starts;
5278   sprocs   = gen_to->procs;
5279   sstatus  = gen_to->sstatus;
5280   sbs      = gen_to->bs;
5281   rstarts  = gen_from->starts;
5282   rprocs   = gen_from->procs;
5283   rbs      = gen_from->bs;
5284 
5285   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5286   if (scall == MAT_INITIAL_MATRIX){
5287     /* i-array */
5288     /*---------*/
5289     /*  post receives */
5290     for (i=0; i<nrecvs; i++){
5291       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5292       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5293       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5294     }
5295 
5296     /* pack the outgoing message */
5297     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5298     sstartsj[0] = 0;  rstartsj[0] = 0;
5299     len = 0; /* total length of j or a array to be sent */
5300     k = 0;
5301     for (i=0; i<nsends; i++){
5302       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5303       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5304       for (j=0; j<nrows; j++) {
5305         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5306         for (l=0; l<sbs; l++){
5307           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5308           rowlen[j*sbs+l] = ncols;
5309           len += ncols;
5310           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5311         }
5312         k++;
5313       }
5314       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5315       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5316     }
5317     /* recvs and sends of i-array are completed */
5318     i = nrecvs;
5319     while (i--) {
5320       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5321     }
5322     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5323 
5324     /* allocate buffers for sending j and a arrays */
5325     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5326     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5327 
5328     /* create i-array of B_oth */
5329     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5330     b_othi[0] = 0;
5331     len = 0; /* total length of j or a array to be received */
5332     k = 0;
5333     for (i=0; i<nrecvs; i++){
5334       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5335       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5336       for (j=0; j<nrows; j++) {
5337         b_othi[k+1] = b_othi[k] + rowlen[j];
5338         len += rowlen[j]; k++;
5339       }
5340       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5341     }
5342 
5343     /* allocate space for j and a arrrays of B_oth */
5344     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5345     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5346 
5347     /* j-array */
5348     /*---------*/
5349     /*  post receives of j-array */
5350     for (i=0; i<nrecvs; i++){
5351       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5352       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5353     }
5354 
5355     /* pack the outgoing message j-array */
5356     k = 0;
5357     for (i=0; i<nsends; i++){
5358       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5359       bufJ = bufj+sstartsj[i];
5360       for (j=0; j<nrows; j++) {
5361         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5362         for (ll=0; ll<sbs; ll++){
5363           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5364           for (l=0; l<ncols; l++){
5365             *bufJ++ = cols[l];
5366           }
5367           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5368         }
5369       }
5370       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5371     }
5372 
5373     /* recvs and sends of j-array are completed */
5374     i = nrecvs;
5375     while (i--) {
5376       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5377     }
5378     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5379   } else if (scall == MAT_REUSE_MATRIX){
5380     sstartsj = *startsj_s;
5381     rstartsj = *startsj_r;
5382     bufa     = *bufa_ptr;
5383     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5384     b_otha   = b_oth->a;
5385   } else {
5386     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5387   }
5388 
5389   /* a-array */
5390   /*---------*/
5391   /*  post receives of a-array */
5392   for (i=0; i<nrecvs; i++){
5393     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5394     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5395   }
5396 
5397   /* pack the outgoing message a-array */
5398   k = 0;
5399   for (i=0; i<nsends; i++){
5400     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5401     bufA = bufa+sstartsj[i];
5402     for (j=0; j<nrows; j++) {
5403       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5404       for (ll=0; ll<sbs; ll++){
5405         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5406         for (l=0; l<ncols; l++){
5407           *bufA++ = vals[l];
5408         }
5409         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5410       }
5411     }
5412     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5413   }
5414   /* recvs and sends of a-array are completed */
5415   i = nrecvs;
5416   while (i--) {
5417     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5418   }
5419   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5420   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5421 
5422   if (scall == MAT_INITIAL_MATRIX){
5423     /* put together the new matrix */
5424     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5425 
5426     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5427     /* Since these are PETSc arrays, change flags to free them as necessary. */
5428     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5429     b_oth->free_a  = PETSC_TRUE;
5430     b_oth->free_ij = PETSC_TRUE;
5431     b_oth->nonew   = 0;
5432 
5433     ierr = PetscFree(bufj);CHKERRQ(ierr);
5434     if (!startsj_s || !bufa_ptr){
5435       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5436       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5437     } else {
5438       *startsj_s = sstartsj;
5439       *startsj_r = rstartsj;
5440       *bufa_ptr  = bufa;
5441     }
5442   }
5443   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5444   PetscFunctionReturn(0);
5445 }
5446 
5447 #undef __FUNCT__
5448 #define __FUNCT__ "MatGetCommunicationStructs"
5449 /*@C
5450   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5451 
5452   Not Collective
5453 
5454   Input Parameters:
5455 . A - The matrix in mpiaij format
5456 
5457   Output Parameter:
5458 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5459 . colmap - A map from global column index to local index into lvec
5460 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5461 
5462   Level: developer
5463 
5464 @*/
5465 #if defined (PETSC_USE_CTABLE)
5466 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5467 #else
5468 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5469 #endif
5470 {
5471   Mat_MPIAIJ *a;
5472 
5473   PetscFunctionBegin;
5474   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5475   PetscValidPointer(lvec, 2);
5476   PetscValidPointer(colmap, 3);
5477   PetscValidPointer(multScatter, 4);
5478   a = (Mat_MPIAIJ *) A->data;
5479   if (lvec) *lvec = a->lvec;
5480   if (colmap) *colmap = a->colmap;
5481   if (multScatter) *multScatter = a->Mvctx;
5482   PetscFunctionReturn(0);
5483 }
5484 
5485 EXTERN_C_BEGIN
5486 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5487 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5488 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5489 EXTERN_C_END
5490 
5491 #undef __FUNCT__
5492 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5493 /*
5494     Computes (B'*A')' since computing B*A directly is untenable
5495 
5496                n                       p                          p
5497         (              )       (              )         (                  )
5498       m (      A       )  *  n (       B      )   =   m (         C        )
5499         (              )       (              )         (                  )
5500 
5501 */
5502 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5503 {
5504   PetscErrorCode     ierr;
5505   Mat                At,Bt,Ct;
5506 
5507   PetscFunctionBegin;
5508   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5509   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5510   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5511   ierr = MatDestroy(&At);CHKERRQ(ierr);
5512   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5513   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5514   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5515   PetscFunctionReturn(0);
5516 }
5517 
5518 #undef __FUNCT__
5519 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5520 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5521 {
5522   PetscErrorCode ierr;
5523   PetscInt       m=A->rmap->n,n=B->cmap->n;
5524   Mat            Cmat;
5525 
5526   PetscFunctionBegin;
5527   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5528   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5529   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5530   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5531   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5532   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5533   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5535   *C   = Cmat;
5536   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5537   PetscFunctionReturn(0);
5538 }
5539 
5540 /* ----------------------------------------------------------------*/
5541 #undef __FUNCT__
5542 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5543 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5544 {
5545   PetscErrorCode ierr;
5546 
5547   PetscFunctionBegin;
5548   if (scall == MAT_INITIAL_MATRIX){
5549     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5550   }
5551   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 EXTERN_C_BEGIN
5556 #if defined(PETSC_HAVE_MUMPS)
5557 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5558 #endif
5559 #if defined(PETSC_HAVE_PASTIX)
5560 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5561 #endif
5562 #if defined(PETSC_HAVE_SUPERLU_DIST)
5563 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5564 #endif
5565 #if defined(PETSC_HAVE_CLIQUE)
5566 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5567 #endif
5568 EXTERN_C_END
5569 
5570 /*MC
5571    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5572 
5573    Options Database Keys:
5574 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5575 
5576   Level: beginner
5577 
5578 .seealso: MatCreateAIJ()
5579 M*/
5580 
5581 EXTERN_C_BEGIN
5582 #undef __FUNCT__
5583 #define __FUNCT__ "MatCreate_MPIAIJ"
5584 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5585 {
5586   Mat_MPIAIJ     *b;
5587   PetscErrorCode ierr;
5588   PetscMPIInt    size;
5589 
5590   PetscFunctionBegin;
5591   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5592   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5593   B->data         = (void*)b;
5594   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5595   B->assembled    = PETSC_FALSE;
5596   B->insertmode   = NOT_SET_VALUES;
5597   b->size         = size;
5598   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5599 
5600   /* build cache for off array entries formed */
5601   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5602   b->donotstash  = PETSC_FALSE;
5603   b->colmap      = 0;
5604   b->garray      = 0;
5605   b->roworiented = PETSC_TRUE;
5606 
5607   /* stuff used for matrix vector multiply */
5608   b->lvec      = PETSC_NULL;
5609   b->Mvctx     = PETSC_NULL;
5610 
5611   /* stuff for MatGetRow() */
5612   b->rowindices   = 0;
5613   b->rowvalues    = 0;
5614   b->getrowactive = PETSC_FALSE;
5615 
5616   /* flexible pointer used in CUSP/CUSPARSE classes */
5617   b->spptr        = PETSC_NULL;
5618 
5619 #if defined(PETSC_HAVE_MUMPS)
5620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5621                                      "MatGetFactor_aij_mumps",
5622                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5623 #endif
5624 #if defined(PETSC_HAVE_PASTIX)
5625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5626 					   "MatGetFactor_mpiaij_pastix",
5627 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5628 #endif
5629 #if defined(PETSC_HAVE_SUPERLU_DIST)
5630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5631                                      "MatGetFactor_mpiaij_superlu_dist",
5632                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5633 #endif
5634 #if defined(PETSC_HAVE_CLIQUE)
5635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5636                                      "MatGetFactor_aij_clique",
5637                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5638 #endif
5639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5640                                      "MatStoreValues_MPIAIJ",
5641                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5643                                      "MatRetrieveValues_MPIAIJ",
5644                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5646 				     "MatGetDiagonalBlock_MPIAIJ",
5647                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5649 				     "MatIsTranspose_MPIAIJ",
5650 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5652 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5653 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5655 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5656 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5658 				     "MatDiagonalScaleLocal_MPIAIJ",
5659 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5661                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5662                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5664                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5665                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5667                                      "MatConvert_MPIAIJ_MPISBAIJ",
5668                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5670                                      "MatMatMult_MPIDense_MPIAIJ",
5671                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5673                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5674                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5676                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5677                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5679   PetscFunctionReturn(0);
5680 }
5681 EXTERN_C_END
5682 
5683 #undef __FUNCT__
5684 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5685 /*@
5686      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5687          and "off-diagonal" part of the matrix in CSR format.
5688 
5689    Collective on MPI_Comm
5690 
5691    Input Parameters:
5692 +  comm - MPI communicator
5693 .  m - number of local rows (Cannot be PETSC_DECIDE)
5694 .  n - This value should be the same as the local size used in creating the
5695        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5696        calculated if N is given) For square matrices n is almost always m.
5697 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5698 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5699 .   i - row indices for "diagonal" portion of matrix
5700 .   j - column indices
5701 .   a - matrix values
5702 .   oi - row indices for "off-diagonal" portion of matrix
5703 .   oj - column indices
5704 -   oa - matrix values
5705 
5706    Output Parameter:
5707 .   mat - the matrix
5708 
5709    Level: advanced
5710 
5711    Notes:
5712        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5713        must free the arrays once the matrix has been destroyed and not before.
5714 
5715        The i and j indices are 0 based
5716 
5717        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5718 
5719        This sets local rows and cannot be used to set off-processor values.
5720 
5721        You cannot later use MatSetValues() to change values in this matrix.
5722 
5723 .keywords: matrix, aij, compressed row, sparse, parallel
5724 
5725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5726           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5727 @*/
5728 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5729 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5730 {
5731   PetscErrorCode ierr;
5732   Mat_MPIAIJ     *maij;
5733 
5734  PetscFunctionBegin;
5735   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5736   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5737   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5738   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5739   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5740   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5741   maij = (Mat_MPIAIJ*) (*mat)->data;
5742   maij->donotstash     = PETSC_TRUE;
5743   (*mat)->preallocated = PETSC_TRUE;
5744 
5745   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5746   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5747 
5748   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5749   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5750 
5751   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5754   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755 
5756   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5757   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5758   PetscFunctionReturn(0);
5759 }
5760 
5761 /*
5762     Special version for direct calls from Fortran
5763 */
5764 #include <petsc-private/fortranimpl.h>
5765 
5766 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5767 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5768 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5769 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5770 #endif
5771 
5772 /* Change these macros so can be used in void function */
5773 #undef CHKERRQ
5774 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5775 #undef SETERRQ2
5776 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5777 #undef SETERRQ3
5778 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5779 #undef SETERRQ
5780 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5781 
5782 EXTERN_C_BEGIN
5783 #undef __FUNCT__
5784 #define __FUNCT__ "matsetvaluesmpiaij_"
5785 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5786 {
5787   Mat             mat = *mmat;
5788   PetscInt        m = *mm, n = *mn;
5789   InsertMode      addv = *maddv;
5790   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5791   PetscScalar     value;
5792   PetscErrorCode  ierr;
5793 
5794   MatCheckPreallocated(mat,1);
5795   if (mat->insertmode == NOT_SET_VALUES) {
5796     mat->insertmode = addv;
5797   }
5798 #if defined(PETSC_USE_DEBUG)
5799   else if (mat->insertmode != addv) {
5800     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5801   }
5802 #endif
5803   {
5804   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5805   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5806   PetscBool       roworiented = aij->roworiented;
5807 
5808   /* Some Variables required in the macro */
5809   Mat             A = aij->A;
5810   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5811   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5812   MatScalar       *aa = a->a;
5813   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5814   Mat             B = aij->B;
5815   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5816   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5817   MatScalar       *ba = b->a;
5818 
5819   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5820   PetscInt        nonew = a->nonew;
5821   MatScalar       *ap1,*ap2;
5822 
5823   PetscFunctionBegin;
5824   for (i=0; i<m; i++) {
5825     if (im[i] < 0) continue;
5826 #if defined(PETSC_USE_DEBUG)
5827     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5828 #endif
5829     if (im[i] >= rstart && im[i] < rend) {
5830       row      = im[i] - rstart;
5831       lastcol1 = -1;
5832       rp1      = aj + ai[row];
5833       ap1      = aa + ai[row];
5834       rmax1    = aimax[row];
5835       nrow1    = ailen[row];
5836       low1     = 0;
5837       high1    = nrow1;
5838       lastcol2 = -1;
5839       rp2      = bj + bi[row];
5840       ap2      = ba + bi[row];
5841       rmax2    = bimax[row];
5842       nrow2    = bilen[row];
5843       low2     = 0;
5844       high2    = nrow2;
5845 
5846       for (j=0; j<n; j++) {
5847         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5848         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5849         if (in[j] >= cstart && in[j] < cend){
5850           col = in[j] - cstart;
5851           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5852         } else if (in[j] < 0) continue;
5853 #if defined(PETSC_USE_DEBUG)
5854         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5855 #endif
5856         else {
5857           if (mat->was_assembled) {
5858             if (!aij->colmap) {
5859               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5860             }
5861 #if defined (PETSC_USE_CTABLE)
5862             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5863 	    col--;
5864 #else
5865             col = aij->colmap[in[j]] - 1;
5866 #endif
5867             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5868               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5869               col =  in[j];
5870               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5871               B = aij->B;
5872               b = (Mat_SeqAIJ*)B->data;
5873               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5874               rp2      = bj + bi[row];
5875               ap2      = ba + bi[row];
5876               rmax2    = bimax[row];
5877               nrow2    = bilen[row];
5878               low2     = 0;
5879               high2    = nrow2;
5880               bm       = aij->B->rmap->n;
5881               ba = b->a;
5882             }
5883           } else col = in[j];
5884           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5885         }
5886       }
5887     } else {
5888       if (!aij->donotstash) {
5889         if (roworiented) {
5890           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5891         } else {
5892           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5893         }
5894       }
5895     }
5896   }}
5897   PetscFunctionReturnVoid();
5898 }
5899 EXTERN_C_END
5900 
5901