xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0e9bae810fdaeb60e2713eaa8ddb89f42e079fd1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   MPI_Comm       comm;
1725   PetscInt       first,local_rowsize,local_colsize;
1726   const PetscInt *rows;
1727   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1732   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1733   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1734   /* collect the global row permutation and invert it */
1735   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1736   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1737   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1738   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1739   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1740   /* get the local target indices */
1741   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1743   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1744   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1745   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1746   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1747   /* the column permutation is so much easier;
1748      make a local version of 'colp' and invert it */
1749   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1750   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1751   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1753   /* now we just get the submatrix */
1754   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1755   /* clean up */
1756   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1757   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1764 {
1765   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1766   Mat            A = mat->A,B = mat->B;
1767   PetscErrorCode ierr;
1768   PetscReal      isend[5],irecv[5];
1769 
1770   PetscFunctionBegin;
1771   info->block_size     = 1.0;
1772   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1774   isend[3] = info->memory;  isend[4] = info->mallocs;
1775   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1776   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1777   isend[3] += info->memory;  isend[4] += info->mallocs;
1778   if (flag == MAT_LOCAL) {
1779     info->nz_used      = isend[0];
1780     info->nz_allocated = isend[1];
1781     info->nz_unneeded  = isend[2];
1782     info->memory       = isend[3];
1783     info->mallocs      = isend[4];
1784   } else if (flag == MAT_GLOBAL_MAX) {
1785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1793     info->nz_used      = irecv[0];
1794     info->nz_allocated = irecv[1];
1795     info->nz_unneeded  = irecv[2];
1796     info->memory       = irecv[3];
1797     info->mallocs      = irecv[4];
1798   }
1799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1800   info->fill_ratio_needed = 0;
1801   info->factor_mallocs    = 0;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetOption_MPIAIJ"
1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1808 {
1809   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   switch (op) {
1814   case MAT_NEW_NONZERO_LOCATIONS:
1815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1816   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1817   case MAT_KEEP_NONZERO_PATTERN:
1818   case MAT_NEW_NONZERO_LOCATION_ERR:
1819   case MAT_USE_INODES:
1820   case MAT_IGNORE_ZERO_ENTRIES:
1821     MatCheckPreallocated(A,1);
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_ROW_ORIENTED:
1826     a->roworiented = flg;
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_NEW_DIAGONALS:
1831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1832     break;
1833   case MAT_IGNORE_OFF_PROC_ENTRIES:
1834     a->donotstash = flg;
1835     break;
1836   case MAT_SPD:
1837     A->spd_set                         = PETSC_TRUE;
1838     A->spd                             = flg;
1839     if (flg) {
1840       A->symmetric                     = PETSC_TRUE;
1841       A->structurally_symmetric        = PETSC_TRUE;
1842       A->symmetric_set                 = PETSC_TRUE;
1843       A->structurally_symmetric_set    = PETSC_TRUE;
1844     }
1845     break;
1846   case MAT_SYMMETRIC:
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_STRUCTURALLY_SYMMETRIC:
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_HERMITIAN:
1853     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1854     break;
1855   case MAT_SYMMETRY_ETERNAL:
1856     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1857     break;
1858   default:
1859     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRow_MPIAIJ"
1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1867 {
1868   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1869   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1870   PetscErrorCode ierr;
1871   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1872   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1873   PetscInt       *cmap,*idx_p;
1874 
1875   PetscFunctionBegin;
1876   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1877   mat->getrowactive = PETSC_TRUE;
1878 
1879   if (!mat->rowvalues && (idx || v)) {
1880     /*
1881         allocate enough space to hold information from the longest row.
1882     */
1883     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1884     PetscInt   max = 1,tmp;
1885     for (i=0; i<matin->rmap->n; i++) {
1886       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1887       if (max < tmp) { max = tmp; }
1888     }
1889     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1890   }
1891 
1892   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1893   lrow = row - rstart;
1894 
1895   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1896   if (!v)   {pvA = 0; pvB = 0;}
1897   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1898   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1899   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1900   nztot = nzA + nzB;
1901 
1902   cmap  = mat->garray;
1903   if (v  || idx) {
1904     if (nztot) {
1905       /* Sort by increasing column numbers, assuming A and B already sorted */
1906       PetscInt imark = -1;
1907       if (v) {
1908         *v = v_p = mat->rowvalues;
1909         for (i=0; i<nzB; i++) {
1910           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1911           else break;
1912         }
1913         imark = i;
1914         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1915         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1916       }
1917       if (idx) {
1918         *idx = idx_p = mat->rowindices;
1919         if (imark > -1) {
1920           for (i=0; i<imark; i++) {
1921             idx_p[i] = cmap[cworkB[i]];
1922           }
1923         } else {
1924           for (i=0; i<nzB; i++) {
1925             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1926             else break;
1927           }
1928           imark = i;
1929         }
1930         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1931         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1932       }
1933     } else {
1934       if (idx) *idx = 0;
1935       if (v)   *v   = 0;
1936     }
1937   }
1938   *nz = nztot;
1939   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1940   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1947 {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1949 
1950   PetscFunctionBegin;
1951   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1952   aij->getrowactive = PETSC_FALSE;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatNorm_MPIAIJ"
1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1962   PetscErrorCode ierr;
1963   PetscInt       i,j,cstart = mat->cmap->rstart;
1964   PetscReal      sum = 0.0;
1965   MatScalar      *v;
1966 
1967   PetscFunctionBegin;
1968   if (aij->size == 1) {
1969     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1970   } else {
1971     if (type == NORM_FROBENIUS) {
1972       v = amat->a;
1973       for (i=0; i<amat->nz; i++) {
1974 #if defined(PETSC_USE_COMPLEX)
1975         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1976 #else
1977         sum += (*v)*(*v); v++;
1978 #endif
1979       }
1980       v = bmat->a;
1981       for (i=0; i<bmat->nz; i++) {
1982 #if defined(PETSC_USE_COMPLEX)
1983         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1984 #else
1985         sum += (*v)*(*v); v++;
1986 #endif
1987       }
1988       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990     } else if (type == NORM_1) { /* max column norm */
1991       PetscReal *tmp,*tmp2;
1992       PetscInt  *jj,*garray = aij->garray;
1993       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1994       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1995       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011     } else if (type == NORM_INFINITY) { /* max row norm */
2012       PetscReal ntemp = 0.0;
2013       for (j=0; j<aij->A->rmap->n; j++) {
2014         v = amat->a + amat->i[j];
2015         sum = 0.0;
2016         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2017           sum += PetscAbsScalar(*v); v++;
2018         }
2019         v = bmat->a + bmat->i[j];
2020         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2021           sum += PetscAbsScalar(*v); v++;
2022         }
2023         if (sum > ntemp) ntemp = sum;
2024       }
2025       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2026     } else {
2027       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2028     }
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatTranspose_MPIAIJ"
2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2036 {
2037   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2038   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2039   PetscErrorCode ierr;
2040   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2041   PetscInt       cstart=A->cmap->rstart,ncol;
2042   Mat            B;
2043   MatScalar      *array;
2044 
2045   PetscFunctionBegin;
2046   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2047 
2048   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2049   ai = Aloc->i; aj = Aloc->j;
2050   bi = Bloc->i; bj = Bloc->j;
2051   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2052     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2053     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2054     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2055     for (i=0; i<ai[ma]; i++){
2056       d_nnz[aj[i]] ++;
2057       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2058     }
2059 
2060     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2061     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2062     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2063     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2064     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2065     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2066     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2067   } else {
2068     B = *matout;
2069     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2070     for (i=0; i<ai[ma]; i++){
2071       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2072     }
2073   }
2074 
2075   /* copy over the A part */
2076   array = Aloc->a;
2077   row = A->rmap->rstart;
2078   for (i=0; i<ma; i++) {
2079     ncol = ai[i+1]-ai[i];
2080     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2081     row++; array += ncol; aj += ncol;
2082   }
2083   aj = Aloc->j;
2084   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2085 
2086   /* copy over the B part */
2087   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2088   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2089   array = Bloc->a;
2090   row = A->rmap->rstart;
2091   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2092   cols_tmp = cols;
2093   for (i=0; i<mb; i++) {
2094     ncol = bi[i+1]-bi[i];
2095     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2096     row++; array += ncol; cols_tmp += ncol;
2097   }
2098   ierr = PetscFree(cols);CHKERRQ(ierr);
2099 
2100   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2101   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2102   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2103     *matout = B;
2104   } else {
2105     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2112 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2113 {
2114   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2115   Mat            a = aij->A,b = aij->B;
2116   PetscErrorCode ierr;
2117   PetscInt       s1,s2,s3;
2118 
2119   PetscFunctionBegin;
2120   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2121   if (rr) {
2122     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2123     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2124     /* Overlap communication with computation. */
2125     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2126   }
2127   if (ll) {
2128     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2129     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2130     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2131   }
2132   /* scale  the diagonal block */
2133   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2134 
2135   if (rr) {
2136     /* Do a scatter end and then right scale the off-diagonal block */
2137     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2138     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2139   }
2140 
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2146 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2147 {
2148   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2149   PetscErrorCode ierr;
2150 
2151   PetscFunctionBegin;
2152   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatEqual_MPIAIJ"
2158 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2159 {
2160   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2161   Mat            a,b,c,d;
2162   PetscBool      flg;
2163   PetscErrorCode ierr;
2164 
2165   PetscFunctionBegin;
2166   a = matA->A; b = matA->B;
2167   c = matB->A; d = matB->B;
2168 
2169   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2170   if (flg) {
2171     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2172   }
2173   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatCopy_MPIAIJ"
2179 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2183   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2184 
2185   PetscFunctionBegin;
2186   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2187   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2188     /* because of the column compression in the off-processor part of the matrix a->B,
2189        the number of columns in a->B and b->B may be different, hence we cannot call
2190        the MatCopy() directly on the two parts. If need be, we can provide a more
2191        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2192        then copying the submatrices */
2193     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2194   } else {
2195     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2196     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatSetUp_MPIAIJ"
2203 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2204 {
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2214 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2215 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2216 {
2217   PetscInt          i,m=Y->rmap->N;
2218   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2219   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2220   const PetscInt    *xi = x->i,*yi = y->i;
2221 
2222   PetscFunctionBegin;
2223   /* Set the number of nonzeros in the new matrix */
2224   for(i=0; i<m; i++) {
2225     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2226     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2227     nnz[i] = 0;
2228     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2229       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2230       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2231       nnz[i]++;
2232     }
2233     for (; k<nzy; k++) nnz[i]++;
2234   }
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatAXPY_MPIAIJ"
2240 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2241 {
2242   PetscErrorCode ierr;
2243   PetscInt       i;
2244   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2245   PetscBLASInt   bnz,one=1;
2246   Mat_SeqAIJ     *x,*y;
2247 
2248   PetscFunctionBegin;
2249   if (str == SAME_NONZERO_PATTERN) {
2250     PetscScalar alpha = a;
2251     x = (Mat_SeqAIJ *)xx->A->data;
2252     y = (Mat_SeqAIJ *)yy->A->data;
2253     bnz = PetscBLASIntCast(x->nz);
2254     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2255     x = (Mat_SeqAIJ *)xx->B->data;
2256     y = (Mat_SeqAIJ *)yy->B->data;
2257     bnz = PetscBLASIntCast(x->nz);
2258     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2259   } else if (str == SUBSET_NONZERO_PATTERN) {
2260     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2261 
2262     x = (Mat_SeqAIJ *)xx->B->data;
2263     y = (Mat_SeqAIJ *)yy->B->data;
2264     if (y->xtoy && y->XtoY != xx->B) {
2265       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2266       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2267     }
2268     if (!y->xtoy) { /* get xtoy */
2269       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2270       y->XtoY = xx->B;
2271       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2272     }
2273     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2274   } else {
2275     Mat B;
2276     PetscInt *nnz_d,*nnz_o;
2277     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2278     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2279     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2280     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2281     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2282     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2283     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2284     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2285     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2286     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2287     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2288     ierr = MatHeaderReplace(Y,B);
2289     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2290     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatConjugate_MPIAIJ"
2299 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2300 {
2301 #if defined(PETSC_USE_COMPLEX)
2302   PetscErrorCode ierr;
2303   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2304 
2305   PetscFunctionBegin;
2306   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2307   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2308 #else
2309   PetscFunctionBegin;
2310 #endif
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatRealPart_MPIAIJ"
2316 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2317 {
2318   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2319   PetscErrorCode ierr;
2320 
2321   PetscFunctionBegin;
2322   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2323   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2330 {
2331   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2336   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #ifdef PETSC_HAVE_PBGL
2341 
2342 #include <boost/parallel/mpi/bsp_process_group.hpp>
2343 #include <boost/graph/distributed/ilu_default_graph.hpp>
2344 #include <boost/graph/distributed/ilu_0_block.hpp>
2345 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2346 #include <boost/graph/distributed/petsc/interface.hpp>
2347 #include <boost/multi_array.hpp>
2348 #include <boost/parallel/distributed_property_map->hpp>
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2352 /*
2353   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2354 */
2355 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2356 {
2357   namespace petsc = boost::distributed::petsc;
2358 
2359   namespace graph_dist = boost::graph::distributed;
2360   using boost::graph::distributed::ilu_default::process_group_type;
2361   using boost::graph::ilu_permuted;
2362 
2363   PetscBool       row_identity, col_identity;
2364   PetscContainer  c;
2365   PetscInt        m, n, M, N;
2366   PetscErrorCode  ierr;
2367 
2368   PetscFunctionBegin;
2369   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2370   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2371   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2372   if (!row_identity || !col_identity) {
2373     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2374   }
2375 
2376   process_group_type pg;
2377   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2378   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2379   lgraph_type&   level_graph = *lgraph_p;
2380   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2381 
2382   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2383   ilu_permuted(level_graph);
2384 
2385   /* put together the new matrix */
2386   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2387   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2388   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2389   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2390   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2391   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2392   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394 
2395   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2396   ierr = PetscContainerSetPointer(c, lgraph_p);
2397   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2398   ierr = PetscContainerDestroy(&c);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2404 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2405 {
2406   PetscFunctionBegin;
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatSolve_MPIAIJ"
2412 /*
2413   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2414 */
2415 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2416 {
2417   namespace graph_dist = boost::graph::distributed;
2418 
2419   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2420   lgraph_type*   lgraph_p;
2421   PetscContainer c;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2426   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2427   ierr = VecCopy(b, x);CHKERRQ(ierr);
2428 
2429   PetscScalar* array_x;
2430   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2431   PetscInt sx;
2432   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2433 
2434   PetscScalar* array_b;
2435   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2436   PetscInt sb;
2437   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2438 
2439   lgraph_type&   level_graph = *lgraph_p;
2440   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2441 
2442   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2443   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2444                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2445 
2446   typedef boost::iterator_property_map<array_ref_type::iterator,
2447                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2448   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2449                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2450 
2451   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2452 
2453   PetscFunctionReturn(0);
2454 }
2455 #endif
2456 
2457 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2458   PetscInt       nzlocal,nsends,nrecvs;
2459   PetscMPIInt    *send_rank,*recv_rank;
2460   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2461   PetscScalar    *sbuf_a,**rbuf_a;
2462   PetscErrorCode (*Destroy)(Mat);
2463 } Mat_Redundant;
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2467 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2468 {
2469   PetscErrorCode       ierr;
2470   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2471   PetscInt             i;
2472 
2473   PetscFunctionBegin;
2474   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2475   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2476   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2477   for (i=0; i<redund->nrecvs; i++){
2478     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2479     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2480   }
2481   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2482   ierr = PetscFree(redund);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatDestroy_MatRedundant"
2488 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2489 {
2490   PetscErrorCode  ierr;
2491   PetscContainer  container;
2492   Mat_Redundant   *redund=PETSC_NULL;
2493 
2494   PetscFunctionBegin;
2495   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2496   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2497   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2498   A->ops->destroy = redund->Destroy;
2499   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2500   if (A->ops->destroy) {
2501     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2502   }
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2508 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2509 {
2510   PetscMPIInt    rank,size;
2511   MPI_Comm       comm=((PetscObject)mat)->comm;
2512   PetscErrorCode ierr;
2513   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2514   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2515   PetscInt       *rowrange=mat->rmap->range;
2516   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2517   Mat            A=aij->A,B=aij->B,C=*matredundant;
2518   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2519   PetscScalar    *sbuf_a;
2520   PetscInt       nzlocal=a->nz+b->nz;
2521   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2522   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2523   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2524   MatScalar      *aworkA,*aworkB;
2525   PetscScalar    *vals;
2526   PetscMPIInt    tag1,tag2,tag3,imdex;
2527   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2528                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2529   MPI_Status     recv_status,*send_status;
2530   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2531   PetscInt       **rbuf_j=PETSC_NULL;
2532   PetscScalar    **rbuf_a=PETSC_NULL;
2533   Mat_Redundant  *redund=PETSC_NULL;
2534   PetscContainer container;
2535 
2536   PetscFunctionBegin;
2537   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2539 
2540   if (reuse == MAT_REUSE_MATRIX) {
2541     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2542     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2543     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2544     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2545     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2546     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2547     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2548     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2549 
2550     nsends    = redund->nsends;
2551     nrecvs    = redund->nrecvs;
2552     send_rank = redund->send_rank;
2553     recv_rank = redund->recv_rank;
2554     sbuf_nz   = redund->sbuf_nz;
2555     rbuf_nz   = redund->rbuf_nz;
2556     sbuf_j    = redund->sbuf_j;
2557     sbuf_a    = redund->sbuf_a;
2558     rbuf_j    = redund->rbuf_j;
2559     rbuf_a    = redund->rbuf_a;
2560   }
2561 
2562   if (reuse == MAT_INITIAL_MATRIX){
2563     PetscMPIInt  subrank,subsize;
2564     PetscInt     nleftover,np_subcomm;
2565     /* get the destination processors' id send_rank, nsends and nrecvs */
2566     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2567     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2568     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2569     np_subcomm = size/nsubcomm;
2570     nleftover  = size - nsubcomm*np_subcomm;
2571     nsends = 0; nrecvs = 0;
2572     for (i=0; i<size; i++){ /* i=rank*/
2573       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2574         send_rank[nsends] = i; nsends++;
2575         recv_rank[nrecvs++] = i;
2576       }
2577     }
2578     if (rank >= size - nleftover){/* this proc is a leftover processor */
2579       i = size-nleftover-1;
2580       j = 0;
2581       while (j < nsubcomm - nleftover){
2582         send_rank[nsends++] = i;
2583         i--; j++;
2584       }
2585     }
2586 
2587     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2588       for (i=0; i<nleftover; i++){
2589         recv_rank[nrecvs++] = size-nleftover+i;
2590       }
2591     }
2592 
2593     /* allocate sbuf_j, sbuf_a */
2594     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2595     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2596     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2597   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2598 
2599   /* copy mat's local entries into the buffers */
2600   if (reuse == MAT_INITIAL_MATRIX){
2601     rownz_max = 0;
2602     rptr = sbuf_j;
2603     cols = sbuf_j + rend-rstart + 1;
2604     vals = sbuf_a;
2605     rptr[0] = 0;
2606     for (i=0; i<rend-rstart; i++){
2607       row = i + rstart;
2608       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2609       ncols  = nzA + nzB;
2610       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2611       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2612       /* load the column indices for this row into cols */
2613       lwrite = 0;
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) < cstart){
2616           vals[lwrite]   = aworkB[l];
2617           cols[lwrite++] = ctmp;
2618         }
2619       }
2620       for (l=0; l<nzA; l++){
2621         vals[lwrite]   = aworkA[l];
2622         cols[lwrite++] = cstart + cworkA[l];
2623       }
2624       for (l=0; l<nzB; l++) {
2625         if ((ctmp = bmap[cworkB[l]]) >= cend){
2626           vals[lwrite]   = aworkB[l];
2627           cols[lwrite++] = ctmp;
2628         }
2629       }
2630       vals += ncols;
2631       cols += ncols;
2632       rptr[i+1] = rptr[i] + ncols;
2633       if (rownz_max < ncols) rownz_max = ncols;
2634     }
2635     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2636   } else { /* only copy matrix values into sbuf_a */
2637     rptr = sbuf_j;
2638     vals = sbuf_a;
2639     rptr[0] = 0;
2640     for (i=0; i<rend-rstart; i++){
2641       row = i + rstart;
2642       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2643       ncols  = nzA + nzB;
2644       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2645       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2646       lwrite = 0;
2647       for (l=0; l<nzB; l++) {
2648         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2649       }
2650       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2651       for (l=0; l<nzB; l++) {
2652         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2653       }
2654       vals += ncols;
2655       rptr[i+1] = rptr[i] + ncols;
2656     }
2657   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2658 
2659   /* send nzlocal to others, and recv other's nzlocal */
2660   /*--------------------------------------------------*/
2661   if (reuse == MAT_INITIAL_MATRIX){
2662     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2663     s_waits2 = s_waits3 + nsends;
2664     s_waits1 = s_waits2 + nsends;
2665     r_waits1 = s_waits1 + nsends;
2666     r_waits2 = r_waits1 + nrecvs;
2667     r_waits3 = r_waits2 + nrecvs;
2668   } else {
2669     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2670     r_waits3 = s_waits3 + nsends;
2671   }
2672 
2673   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2674   if (reuse == MAT_INITIAL_MATRIX){
2675     /* get new tags to keep the communication clean */
2676     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2677     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2678     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2679 
2680     /* post receives of other's nzlocal */
2681     for (i=0; i<nrecvs; i++){
2682       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2683     }
2684     /* send nzlocal to others */
2685     for (i=0; i<nsends; i++){
2686       sbuf_nz[i] = nzlocal;
2687       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2688     }
2689     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2690     count = nrecvs;
2691     while (count) {
2692       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2693       recv_rank[imdex] = recv_status.MPI_SOURCE;
2694       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2695       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2696 
2697       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2698       rbuf_nz[imdex] += i + 2;
2699       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2700       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2701       count--;
2702     }
2703     /* wait on sends of nzlocal */
2704     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2705     /* send mat->i,j to others, and recv from other's */
2706     /*------------------------------------------------*/
2707     for (i=0; i<nsends; i++){
2708       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2709       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2710     }
2711     /* wait on receives of mat->i,j */
2712     /*------------------------------*/
2713     count = nrecvs;
2714     while (count) {
2715       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2716       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2717       count--;
2718     }
2719     /* wait on sends of mat->i,j */
2720     /*---------------------------*/
2721     if (nsends) {
2722       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2723     }
2724   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2725 
2726   /* post receives, send and receive mat->a */
2727   /*----------------------------------------*/
2728   for (imdex=0; imdex<nrecvs; imdex++) {
2729     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2730   }
2731   for (i=0; i<nsends; i++){
2732     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2733   }
2734   count = nrecvs;
2735   while (count) {
2736     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2737     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2738     count--;
2739   }
2740   if (nsends) {
2741     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2742   }
2743 
2744   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2745 
2746   /* create redundant matrix */
2747   /*-------------------------*/
2748   if (reuse == MAT_INITIAL_MATRIX){
2749     /* compute rownz_max for preallocation */
2750     for (imdex=0; imdex<nrecvs; imdex++){
2751       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2752       rptr = rbuf_j[imdex];
2753       for (i=0; i<j; i++){
2754         ncols = rptr[i+1] - rptr[i];
2755         if (rownz_max < ncols) rownz_max = ncols;
2756       }
2757     }
2758 
2759     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2760     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2761     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2762     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2763     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2764     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2765   } else {
2766     C = *matredundant;
2767   }
2768 
2769   /* insert local matrix entries */
2770   rptr = sbuf_j;
2771   cols = sbuf_j + rend-rstart + 1;
2772   vals = sbuf_a;
2773   for (i=0; i<rend-rstart; i++){
2774     row   = i + rstart;
2775     ncols = rptr[i+1] - rptr[i];
2776     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2777     vals += ncols;
2778     cols += ncols;
2779   }
2780   /* insert received matrix entries */
2781   for (imdex=0; imdex<nrecvs; imdex++){
2782     rstart = rowrange[recv_rank[imdex]];
2783     rend   = rowrange[recv_rank[imdex]+1];
2784     rptr = rbuf_j[imdex];
2785     cols = rbuf_j[imdex] + rend-rstart + 1;
2786     vals = rbuf_a[imdex];
2787     for (i=0; i<rend-rstart; i++){
2788       row   = i + rstart;
2789       ncols = rptr[i+1] - rptr[i];
2790       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2791       vals += ncols;
2792       cols += ncols;
2793     }
2794   }
2795   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2798   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2799   if (reuse == MAT_INITIAL_MATRIX) {
2800     PetscContainer container;
2801     *matredundant = C;
2802     /* create a supporting struct and attach it to C for reuse */
2803     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2804     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2805     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2806     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2807     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2808     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2809 
2810     redund->nzlocal = nzlocal;
2811     redund->nsends  = nsends;
2812     redund->nrecvs  = nrecvs;
2813     redund->send_rank = send_rank;
2814     redund->recv_rank = recv_rank;
2815     redund->sbuf_nz = sbuf_nz;
2816     redund->rbuf_nz = rbuf_nz;
2817     redund->sbuf_j  = sbuf_j;
2818     redund->sbuf_a  = sbuf_a;
2819     redund->rbuf_j  = rbuf_j;
2820     redund->rbuf_a  = rbuf_a;
2821 
2822     redund->Destroy = C->ops->destroy;
2823     C->ops->destroy = MatDestroy_MatRedundant;
2824   }
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2831 {
2832   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2833   PetscErrorCode ierr;
2834   PetscInt       i,*idxb = 0;
2835   PetscScalar    *va,*vb;
2836   Vec            vtmp;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2840   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2841   if (idx) {
2842     for (i=0; i<A->rmap->n; i++) {
2843       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2844     }
2845   }
2846 
2847   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2848   if (idx) {
2849     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2850   }
2851   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2852   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2853 
2854   for (i=0; i<A->rmap->n; i++){
2855     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2856       va[i] = vb[i];
2857       if (idx) idx[i] = a->garray[idxb[i]];
2858     }
2859   }
2860 
2861   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2862   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2863   ierr = PetscFree(idxb);CHKERRQ(ierr);
2864   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2871 {
2872   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2873   PetscErrorCode ierr;
2874   PetscInt       i,*idxb = 0;
2875   PetscScalar    *va,*vb;
2876   Vec            vtmp;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2880   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2881   if (idx) {
2882     for (i=0; i<A->cmap->n; i++) {
2883       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2884     }
2885   }
2886 
2887   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2888   if (idx) {
2889     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2890   }
2891   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2892   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2893 
2894   for (i=0; i<A->rmap->n; i++){
2895     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2896       va[i] = vb[i];
2897       if (idx) idx[i] = a->garray[idxb[i]];
2898     }
2899   }
2900 
2901   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2902   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2903   ierr = PetscFree(idxb);CHKERRQ(ierr);
2904   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 #undef __FUNCT__
2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911 {
2912   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2913   PetscInt       n      = A->rmap->n;
2914   PetscInt       cstart = A->cmap->rstart;
2915   PetscInt      *cmap   = mat->garray;
2916   PetscInt      *diagIdx, *offdiagIdx;
2917   Vec            diagV, offdiagV;
2918   PetscScalar   *a, *diagA, *offdiagA;
2919   PetscInt       r;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2924   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2925   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2926   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2927   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2928   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2929   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2930   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2931   for(r = 0; r < n; ++r) {
2932     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2933       a[r]   = diagA[r];
2934       idx[r] = cstart + diagIdx[r];
2935     } else {
2936       a[r]   = offdiagA[r];
2937       idx[r] = cmap[offdiagIdx[r]];
2938     }
2939   }
2940   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2944   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2945   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2952 {
2953   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2954   PetscInt       n      = A->rmap->n;
2955   PetscInt       cstart = A->cmap->rstart;
2956   PetscInt      *cmap   = mat->garray;
2957   PetscInt      *diagIdx, *offdiagIdx;
2958   Vec            diagV, offdiagV;
2959   PetscScalar   *a, *diagA, *offdiagA;
2960   PetscInt       r;
2961   PetscErrorCode ierr;
2962 
2963   PetscFunctionBegin;
2964   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2965   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2966   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2967   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2968   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2969   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2970   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2971   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2972   for(r = 0; r < n; ++r) {
2973     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2974       a[r]   = diagA[r];
2975       idx[r] = cstart + diagIdx[r];
2976     } else {
2977       a[r]   = offdiagA[r];
2978       idx[r] = cmap[offdiagIdx[r]];
2979     }
2980   }
2981   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2985   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2986   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2993 {
2994   PetscErrorCode ierr;
2995   Mat            *dummy;
2996 
2997   PetscFunctionBegin;
2998   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2999   *newmat = *dummy;
3000   ierr = PetscFree(dummy);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3008 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3009 {
3010   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3011   PetscErrorCode ierr;
3012 
3013   PetscFunctionBegin;
3014   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3020 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3021 {
3022   PetscErrorCode ierr;
3023   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3024 
3025   PetscFunctionBegin;
3026   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3027   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3028   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3029   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 /* -------------------------------------------------------------------*/
3034 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3035        MatGetRow_MPIAIJ,
3036        MatRestoreRow_MPIAIJ,
3037        MatMult_MPIAIJ,
3038 /* 4*/ MatMultAdd_MPIAIJ,
3039        MatMultTranspose_MPIAIJ,
3040        MatMultTransposeAdd_MPIAIJ,
3041 #ifdef PETSC_HAVE_PBGL
3042        MatSolve_MPIAIJ,
3043 #else
3044        0,
3045 #endif
3046        0,
3047        0,
3048 /*10*/ 0,
3049        0,
3050        0,
3051        MatSOR_MPIAIJ,
3052        MatTranspose_MPIAIJ,
3053 /*15*/ MatGetInfo_MPIAIJ,
3054        MatEqual_MPIAIJ,
3055        MatGetDiagonal_MPIAIJ,
3056        MatDiagonalScale_MPIAIJ,
3057        MatNorm_MPIAIJ,
3058 /*20*/ MatAssemblyBegin_MPIAIJ,
3059        MatAssemblyEnd_MPIAIJ,
3060        MatSetOption_MPIAIJ,
3061        MatZeroEntries_MPIAIJ,
3062 /*24*/ MatZeroRows_MPIAIJ,
3063        0,
3064 #ifdef PETSC_HAVE_PBGL
3065        0,
3066 #else
3067        0,
3068 #endif
3069        0,
3070        0,
3071 /*29*/ MatSetUp_MPIAIJ,
3072 #ifdef PETSC_HAVE_PBGL
3073        0,
3074 #else
3075        0,
3076 #endif
3077        0,
3078        0,
3079        0,
3080 /*34*/ MatDuplicate_MPIAIJ,
3081        0,
3082        0,
3083        0,
3084        0,
3085 /*39*/ MatAXPY_MPIAIJ,
3086        MatGetSubMatrices_MPIAIJ,
3087        MatIncreaseOverlap_MPIAIJ,
3088        MatGetValues_MPIAIJ,
3089        MatCopy_MPIAIJ,
3090 /*44*/ MatGetRowMax_MPIAIJ,
3091        MatScale_MPIAIJ,
3092        0,
3093        0,
3094        MatZeroRowsColumns_MPIAIJ,
3095 /*49*/ MatSetRandom_MPIAIJ,
3096        0,
3097        0,
3098        0,
3099        0,
3100 /*54*/ MatFDColoringCreate_MPIAIJ,
3101        0,
3102        MatSetUnfactored_MPIAIJ,
3103        0, /* MatPermute_MPIAIJ, impl currently broken */
3104        0,
3105 /*59*/ MatGetSubMatrix_MPIAIJ,
3106        MatDestroy_MPIAIJ,
3107        MatView_MPIAIJ,
3108        0,
3109        0,
3110 /*64*/ 0,
3111        0,
3112        0,
3113        0,
3114        0,
3115 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3116        MatGetRowMinAbs_MPIAIJ,
3117        0,
3118        MatSetColoring_MPIAIJ,
3119 #if defined(PETSC_HAVE_ADIC)
3120        MatSetValuesAdic_MPIAIJ,
3121 #else
3122        0,
3123 #endif
3124        MatSetValuesAdifor_MPIAIJ,
3125 /*75*/ MatFDColoringApply_AIJ,
3126        0,
3127        0,
3128        0,
3129        MatFindZeroDiagonals_MPIAIJ,
3130 /*80*/ 0,
3131        0,
3132        0,
3133 /*83*/ MatLoad_MPIAIJ,
3134        0,
3135        0,
3136        0,
3137        0,
3138        0,
3139 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3140        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3141        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3142        MatPtAP_Basic,
3143        MatPtAPSymbolic_MPIAIJ,
3144 /*94*/ MatPtAPNumeric_MPIAIJ,
3145        0,
3146        0,
3147        0,
3148        0,
3149 /*99*/ 0,
3150        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3151        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3152        MatConjugate_MPIAIJ,
3153        0,
3154 /*104*/MatSetValuesRow_MPIAIJ,
3155        MatRealPart_MPIAIJ,
3156        MatImaginaryPart_MPIAIJ,
3157        0,
3158        0,
3159 /*109*/0,
3160        MatGetRedundantMatrix_MPIAIJ,
3161        MatGetRowMin_MPIAIJ,
3162        0,
3163        0,
3164 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3165        0,
3166        0,
3167        0,
3168        0,
3169 /*119*/0,
3170        0,
3171        0,
3172        0,
3173        MatGetMultiProcBlock_MPIAIJ,
3174 /*124*/MatFindNonzeroRows_MPIAIJ,
3175        MatGetColumnNorms_MPIAIJ,
3176        MatInvertBlockDiagonal_MPIAIJ,
3177        0,
3178        MatGetSubMatricesParallel_MPIAIJ,
3179 /*129*/0,
3180        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3181        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3182        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3183        0,
3184 /*134*/0,
3185        0,
3186        0,
3187        0,
3188        0
3189 };
3190 
3191 /* ----------------------------------------------------------------------------------------*/
3192 
3193 EXTERN_C_BEGIN
3194 #undef __FUNCT__
3195 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3196 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3197 {
3198   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3199   PetscErrorCode ierr;
3200 
3201   PetscFunctionBegin;
3202   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3203   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 EXTERN_C_END
3207 
3208 EXTERN_C_BEGIN
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3211 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3212 {
3213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3214   PetscErrorCode ierr;
3215 
3216   PetscFunctionBegin;
3217   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3218   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3219   PetscFunctionReturn(0);
3220 }
3221 EXTERN_C_END
3222 
3223 EXTERN_C_BEGIN
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3226 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3227 {
3228   Mat_MPIAIJ     *b;
3229   PetscErrorCode ierr;
3230   PetscInt       i;
3231   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3232 
3233   PetscFunctionBegin;
3234   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3235   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3236   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3237   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3238   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3239   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3240 
3241   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3243   if (d_nnz) {
3244     for (i=0; i<B->rmap->n; i++) {
3245       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3246     }
3247   }
3248   if (o_nnz) {
3249     for (i=0; i<B->rmap->n; i++) {
3250       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3251     }
3252   }
3253   b = (Mat_MPIAIJ*)B->data;
3254 
3255   if (!B->preallocated) {
3256     /* Explicitly create 2 MATSEQAIJ matrices. */
3257     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3258     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3259     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3260     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3261     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3262     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3263     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3264     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3265     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3266     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3267   }
3268 
3269   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3270   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3271   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3272   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3273   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3274   B->preallocated = PETSC_TRUE;
3275   PetscFunctionReturn(0);
3276 }
3277 EXTERN_C_END
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3281 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3282 {
3283   Mat            mat;
3284   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3285   PetscErrorCode ierr;
3286 
3287   PetscFunctionBegin;
3288   *newmat       = 0;
3289   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3290   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3291   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3292   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3293   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3294   a    = (Mat_MPIAIJ*)mat->data;
3295 
3296   mat->factortype    = matin->factortype;
3297   mat->rmap->bs      = matin->rmap->bs;
3298   mat->cmap->bs      = matin->cmap->bs;
3299   mat->assembled    = PETSC_TRUE;
3300   mat->insertmode   = NOT_SET_VALUES;
3301   mat->preallocated = PETSC_TRUE;
3302 
3303   a->size           = oldmat->size;
3304   a->rank           = oldmat->rank;
3305   a->donotstash     = oldmat->donotstash;
3306   a->roworiented    = oldmat->roworiented;
3307   a->rowindices     = 0;
3308   a->rowvalues      = 0;
3309   a->getrowactive   = PETSC_FALSE;
3310 
3311   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3312   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3313 
3314   if (oldmat->colmap) {
3315 #if defined (PETSC_USE_CTABLE)
3316     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3317 #else
3318     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3319     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3320     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3321 #endif
3322   } else a->colmap = 0;
3323   if (oldmat->garray) {
3324     PetscInt len;
3325     len  = oldmat->B->cmap->n;
3326     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3327     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3328     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3329   } else a->garray = 0;
3330 
3331   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3332   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3333   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3334   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3335   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3336   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3337   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3338   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3339   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3340   *newmat = mat;
3341   PetscFunctionReturn(0);
3342 }
3343 
3344 
3345 
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatLoad_MPIAIJ"
3348 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3349 {
3350   PetscScalar    *vals,*svals;
3351   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3352   PetscErrorCode ierr;
3353   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3354   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3355   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3356   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3357   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3358   int            fd;
3359   PetscInt       bs = 1;
3360 
3361   PetscFunctionBegin;
3362   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3363   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3364   if (!rank) {
3365     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3366     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3367     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3368   }
3369 
3370   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3371   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3372   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3373 
3374   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3375 
3376   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3377   M = header[1]; N = header[2];
3378   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3379   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3380   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3381 
3382   /* If global sizes are set, check if they are consistent with that given in the file */
3383   if (sizesset) {
3384     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3385   }
3386   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3387   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3388 
3389   /* determine ownership of all (block) rows */
3390   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3391   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3392   else m = newMat->rmap->n; /* Set by user */
3393 
3394   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3395   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3396 
3397   /* First process needs enough room for process with most rows */
3398   if (!rank) {
3399     mmax = rowners[1];
3400     for(i=2; i<=size; i++) {
3401       mmax = PetscMax(mmax, rowners[i]);
3402     }
3403   } else mmax = m;
3404 
3405   rowners[0] = 0;
3406   for (i=2; i<=size; i++) {
3407     rowners[i] += rowners[i-1];
3408   }
3409   rstart = rowners[rank];
3410   rend   = rowners[rank+1];
3411 
3412   /* distribute row lengths to all processors */
3413   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3414   if (!rank) {
3415     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3416     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3417     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3418     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3419     for (j=0; j<m; j++) {
3420       procsnz[0] += ourlens[j];
3421     }
3422     for (i=1; i<size; i++) {
3423       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3424       /* calculate the number of nonzeros on each processor */
3425       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3426         procsnz[i] += rowlengths[j];
3427       }
3428       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3429     }
3430     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3431   } else {
3432     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3433   }
3434 
3435   if (!rank) {
3436     /* determine max buffer needed and allocate it */
3437     maxnz = 0;
3438     for (i=0; i<size; i++) {
3439       maxnz = PetscMax(maxnz,procsnz[i]);
3440     }
3441     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3442 
3443     /* read in my part of the matrix column indices  */
3444     nz   = procsnz[0];
3445     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3446     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3447 
3448     /* read in every one elses and ship off */
3449     for (i=1; i<size; i++) {
3450       nz     = procsnz[i];
3451       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3452       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3453     }
3454     ierr = PetscFree(cols);CHKERRQ(ierr);
3455   } else {
3456     /* determine buffer space needed for message */
3457     nz = 0;
3458     for (i=0; i<m; i++) {
3459       nz += ourlens[i];
3460     }
3461     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3462 
3463     /* receive message of column indices*/
3464     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3465   }
3466 
3467   /* determine column ownership if matrix is not square */
3468   if (N != M) {
3469     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3470     else n = newMat->cmap->n;
3471     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3472     cstart = cend - n;
3473   } else {
3474     cstart = rstart;
3475     cend   = rend;
3476     n      = cend - cstart;
3477   }
3478 
3479   /* loop over local rows, determining number of off diagonal entries */
3480   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3481   jj = 0;
3482   for (i=0; i<m; i++) {
3483     for (j=0; j<ourlens[i]; j++) {
3484       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3485       jj++;
3486     }
3487   }
3488 
3489   for (i=0; i<m; i++) {
3490     ourlens[i] -= offlens[i];
3491   }
3492   if (!sizesset) {
3493     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3494   }
3495 
3496   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3497 
3498   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3499 
3500   for (i=0; i<m; i++) {
3501     ourlens[i] += offlens[i];
3502   }
3503 
3504   if (!rank) {
3505     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3506 
3507     /* read in my part of the matrix numerical values  */
3508     nz   = procsnz[0];
3509     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3510 
3511     /* insert into matrix */
3512     jj      = rstart;
3513     smycols = mycols;
3514     svals   = vals;
3515     for (i=0; i<m; i++) {
3516       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3517       smycols += ourlens[i];
3518       svals   += ourlens[i];
3519       jj++;
3520     }
3521 
3522     /* read in other processors and ship out */
3523     for (i=1; i<size; i++) {
3524       nz     = procsnz[i];
3525       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3526       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3527     }
3528     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3529   } else {
3530     /* receive numeric values */
3531     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3532 
3533     /* receive message of values*/
3534     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3535 
3536     /* insert into matrix */
3537     jj      = rstart;
3538     smycols = mycols;
3539     svals   = vals;
3540     for (i=0; i<m; i++) {
3541       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3542       smycols += ourlens[i];
3543       svals   += ourlens[i];
3544       jj++;
3545     }
3546   }
3547   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3548   ierr = PetscFree(vals);CHKERRQ(ierr);
3549   ierr = PetscFree(mycols);CHKERRQ(ierr);
3550   ierr = PetscFree(rowners);CHKERRQ(ierr);
3551   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3552   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3553 
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3559 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3560 {
3561   PetscErrorCode ierr;
3562   IS             iscol_local;
3563   PetscInt       csize;
3564 
3565   PetscFunctionBegin;
3566   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3567   if (call == MAT_REUSE_MATRIX) {
3568     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3569     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3570   } else {
3571     PetscInt cbs;
3572     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3573     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3574     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3575   }
3576   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3577   if (call == MAT_INITIAL_MATRIX) {
3578     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3579     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3580   }
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3587 /*
3588     Not great since it makes two copies of the submatrix, first an SeqAIJ
3589   in local and then by concatenating the local matrices the end result.
3590   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3591 
3592   Note: This requires a sequential iscol with all indices.
3593 */
3594 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3595 {
3596   PetscErrorCode ierr;
3597   PetscMPIInt    rank,size;
3598   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3599   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3600   PetscBool      allcolumns, colflag;
3601   Mat            M,Mreuse;
3602   MatScalar      *vwork,*aa;
3603   MPI_Comm       comm = ((PetscObject)mat)->comm;
3604   Mat_SeqAIJ     *aij;
3605 
3606 
3607   PetscFunctionBegin;
3608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3609   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3610 
3611   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3612   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3613   if (colflag && ncol == mat->cmap->N){
3614     allcolumns = PETSC_TRUE;
3615   } else {
3616     allcolumns = PETSC_FALSE;
3617   }
3618   if (call ==  MAT_REUSE_MATRIX) {
3619     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3620     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3621     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3622   } else {
3623     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3624   }
3625 
3626   /*
3627       m - number of local rows
3628       n - number of columns (same on all processors)
3629       rstart - first row in new global matrix generated
3630   */
3631   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3632   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3633   if (call == MAT_INITIAL_MATRIX) {
3634     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3635     ii  = aij->i;
3636     jj  = aij->j;
3637 
3638     /*
3639         Determine the number of non-zeros in the diagonal and off-diagonal
3640         portions of the matrix in order to do correct preallocation
3641     */
3642 
3643     /* first get start and end of "diagonal" columns */
3644     if (csize == PETSC_DECIDE) {
3645       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3646       if (mglobal == n) { /* square matrix */
3647 	nlocal = m;
3648       } else {
3649         nlocal = n/size + ((n % size) > rank);
3650       }
3651     } else {
3652       nlocal = csize;
3653     }
3654     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3655     rstart = rend - nlocal;
3656     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3657 
3658     /* next, compute all the lengths */
3659     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3660     olens = dlens + m;
3661     for (i=0; i<m; i++) {
3662       jend = ii[i+1] - ii[i];
3663       olen = 0;
3664       dlen = 0;
3665       for (j=0; j<jend; j++) {
3666         if (*jj < rstart || *jj >= rend) olen++;
3667         else dlen++;
3668         jj++;
3669       }
3670       olens[i] = olen;
3671       dlens[i] = dlen;
3672     }
3673     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3674     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3675     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3676     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3677     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3678     ierr = PetscFree(dlens);CHKERRQ(ierr);
3679   } else {
3680     PetscInt ml,nl;
3681 
3682     M = *newmat;
3683     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3684     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3685     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3694   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3695   ii  = aij->i;
3696   jj  = aij->j;
3697   aa  = aij->a;
3698   for (i=0; i<m; i++) {
3699     row   = rstart + i;
3700     nz    = ii[i+1] - ii[i];
3701     cwork = jj;     jj += nz;
3702     vwork = aa;     aa += nz;
3703     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3704   }
3705 
3706   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   *newmat = M;
3709 
3710   /* save submatrix used in processor for next request */
3711   if (call ==  MAT_INITIAL_MATRIX) {
3712     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3713     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3714   }
3715 
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 EXTERN_C_BEGIN
3720 #undef __FUNCT__
3721 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3722 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3723 {
3724   PetscInt       m,cstart, cend,j,nnz,i,d;
3725   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3726   const PetscInt *JJ;
3727   PetscScalar    *values;
3728   PetscErrorCode ierr;
3729 
3730   PetscFunctionBegin;
3731   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3732 
3733   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3734   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3735   m      = B->rmap->n;
3736   cstart = B->cmap->rstart;
3737   cend   = B->cmap->rend;
3738   rstart = B->rmap->rstart;
3739 
3740   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3741 
3742 #if defined(PETSC_USE_DEBUGGING)
3743   for (i=0; i<m; i++) {
3744     nnz     = Ii[i+1]- Ii[i];
3745     JJ      = J + Ii[i];
3746     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3747     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3748     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3749   }
3750 #endif
3751 
3752   for (i=0; i<m; i++) {
3753     nnz     = Ii[i+1]- Ii[i];
3754     JJ      = J + Ii[i];
3755     nnz_max = PetscMax(nnz_max,nnz);
3756     d       = 0;
3757     for (j=0; j<nnz; j++) {
3758       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3759     }
3760     d_nnz[i] = d;
3761     o_nnz[i] = nnz - d;
3762   }
3763   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3764   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3765 
3766   if (v) values = (PetscScalar*)v;
3767   else {
3768     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3769     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3770   }
3771 
3772   for (i=0; i<m; i++) {
3773     ii   = i + rstart;
3774     nnz  = Ii[i+1]- Ii[i];
3775     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3776   }
3777   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3778   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779 
3780   if (!v) {
3781     ierr = PetscFree(values);CHKERRQ(ierr);
3782   }
3783   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3784   PetscFunctionReturn(0);
3785 }
3786 EXTERN_C_END
3787 
3788 #undef __FUNCT__
3789 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3790 /*@
3791    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3792    (the default parallel PETSc format).
3793 
3794    Collective on MPI_Comm
3795 
3796    Input Parameters:
3797 +  B - the matrix
3798 .  i - the indices into j for the start of each local row (starts with zero)
3799 .  j - the column indices for each local row (starts with zero)
3800 -  v - optional values in the matrix
3801 
3802    Level: developer
3803 
3804    Notes:
3805        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3806      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3807      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3808 
3809        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3810 
3811        The format which is used for the sparse matrix input, is equivalent to a
3812     row-major ordering.. i.e for the following matrix, the input data expected is
3813     as shown:
3814 
3815         1 0 0
3816         2 0 3     P0
3817        -------
3818         4 5 6     P1
3819 
3820      Process0 [P0]: rows_owned=[0,1]
3821         i =  {0,1,3}  [size = nrow+1  = 2+1]
3822         j =  {0,0,2}  [size = nz = 6]
3823         v =  {1,2,3}  [size = nz = 6]
3824 
3825      Process1 [P1]: rows_owned=[2]
3826         i =  {0,3}    [size = nrow+1  = 1+1]
3827         j =  {0,1,2}  [size = nz = 6]
3828         v =  {4,5,6}  [size = nz = 6]
3829 
3830 .keywords: matrix, aij, compressed row, sparse, parallel
3831 
3832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3833           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3834 @*/
3835 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3836 {
3837   PetscErrorCode ierr;
3838 
3839   PetscFunctionBegin;
3840   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 #undef __FUNCT__
3845 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3846 /*@C
3847    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3848    (the default parallel PETSc format).  For good matrix assembly performance
3849    the user should preallocate the matrix storage by setting the parameters
3850    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3851    performance can be increased by more than a factor of 50.
3852 
3853    Collective on MPI_Comm
3854 
3855    Input Parameters:
3856 +  A - the matrix
3857 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3858            (same value is used for all local rows)
3859 .  d_nnz - array containing the number of nonzeros in the various rows of the
3860            DIAGONAL portion of the local submatrix (possibly different for each row)
3861            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3862            The size of this array is equal to the number of local rows, i.e 'm'.
3863            For matrices that will be factored, you must leave room for (and set)
3864            the diagonal entry even if it is zero.
3865 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3866            submatrix (same value is used for all local rows).
3867 -  o_nnz - array containing the number of nonzeros in the various rows of the
3868            OFF-DIAGONAL portion of the local submatrix (possibly different for
3869            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3870            structure. The size of this array is equal to the number
3871            of local rows, i.e 'm'.
3872 
3873    If the *_nnz parameter is given then the *_nz parameter is ignored
3874 
3875    The AIJ format (also called the Yale sparse matrix format or
3876    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3877    storage.  The stored row and column indices begin with zero.
3878    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3879 
3880    The parallel matrix is partitioned such that the first m0 rows belong to
3881    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3882    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3883 
3884    The DIAGONAL portion of the local submatrix of a processor can be defined
3885    as the submatrix which is obtained by extraction the part corresponding to
3886    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3887    first row that belongs to the processor, r2 is the last row belonging to
3888    the this processor, and c1-c2 is range of indices of the local part of a
3889    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3890    common case of a square matrix, the row and column ranges are the same and
3891    the DIAGONAL part is also square. The remaining portion of the local
3892    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3893 
3894    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3895 
3896    You can call MatGetInfo() to get information on how effective the preallocation was;
3897    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3898    You can also run with the option -info and look for messages with the string
3899    malloc in them to see if additional memory allocation was needed.
3900 
3901    Example usage:
3902 
3903    Consider the following 8x8 matrix with 34 non-zero values, that is
3904    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3905    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3906    as follows:
3907 
3908 .vb
3909             1  2  0  |  0  3  0  |  0  4
3910     Proc0   0  5  6  |  7  0  0  |  8  0
3911             9  0 10  | 11  0  0  | 12  0
3912     -------------------------------------
3913            13  0 14  | 15 16 17  |  0  0
3914     Proc1   0 18  0  | 19 20 21  |  0  0
3915             0  0  0  | 22 23  0  | 24  0
3916     -------------------------------------
3917     Proc2  25 26 27  |  0  0 28  | 29  0
3918            30  0  0  | 31 32 33  |  0 34
3919 .ve
3920 
3921    This can be represented as a collection of submatrices as:
3922 
3923 .vb
3924       A B C
3925       D E F
3926       G H I
3927 .ve
3928 
3929    Where the submatrices A,B,C are owned by proc0, D,E,F are
3930    owned by proc1, G,H,I are owned by proc2.
3931 
3932    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3933    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3934    The 'M','N' parameters are 8,8, and have the same values on all procs.
3935 
3936    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3937    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3938    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3939    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3940    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3941    matrix, ans [DF] as another SeqAIJ matrix.
3942 
3943    When d_nz, o_nz parameters are specified, d_nz storage elements are
3944    allocated for every row of the local diagonal submatrix, and o_nz
3945    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3946    One way to choose d_nz and o_nz is to use the max nonzerors per local
3947    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3948    In this case, the values of d_nz,o_nz are:
3949 .vb
3950      proc0 : dnz = 2, o_nz = 2
3951      proc1 : dnz = 3, o_nz = 2
3952      proc2 : dnz = 1, o_nz = 4
3953 .ve
3954    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3955    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3956    for proc3. i.e we are using 12+15+10=37 storage locations to store
3957    34 values.
3958 
3959    When d_nnz, o_nnz parameters are specified, the storage is specified
3960    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3961    In the above case the values for d_nnz,o_nnz are:
3962 .vb
3963      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3964      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3965      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3966 .ve
3967    Here the space allocated is sum of all the above values i.e 34, and
3968    hence pre-allocation is perfect.
3969 
3970    Level: intermediate
3971 
3972 .keywords: matrix, aij, compressed row, sparse, parallel
3973 
3974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3975           MPIAIJ, MatGetInfo()
3976 @*/
3977 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3978 {
3979   PetscErrorCode ierr;
3980 
3981   PetscFunctionBegin;
3982   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3983   PetscValidType(B,1);
3984   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3985   PetscFunctionReturn(0);
3986 }
3987 
3988 #undef __FUNCT__
3989 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3990 /*@
3991      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3992          CSR format the local rows.
3993 
3994    Collective on MPI_Comm
3995 
3996    Input Parameters:
3997 +  comm - MPI communicator
3998 .  m - number of local rows (Cannot be PETSC_DECIDE)
3999 .  n - This value should be the same as the local size used in creating the
4000        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4001        calculated if N is given) For square matrices n is almost always m.
4002 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4003 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4004 .   i - row indices
4005 .   j - column indices
4006 -   a - matrix values
4007 
4008    Output Parameter:
4009 .   mat - the matrix
4010 
4011    Level: intermediate
4012 
4013    Notes:
4014        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4015      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4016      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4017 
4018        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4019 
4020        The format which is used for the sparse matrix input, is equivalent to a
4021     row-major ordering.. i.e for the following matrix, the input data expected is
4022     as shown:
4023 
4024         1 0 0
4025         2 0 3     P0
4026        -------
4027         4 5 6     P1
4028 
4029      Process0 [P0]: rows_owned=[0,1]
4030         i =  {0,1,3}  [size = nrow+1  = 2+1]
4031         j =  {0,0,2}  [size = nz = 6]
4032         v =  {1,2,3}  [size = nz = 6]
4033 
4034      Process1 [P1]: rows_owned=[2]
4035         i =  {0,3}    [size = nrow+1  = 1+1]
4036         j =  {0,1,2}  [size = nz = 6]
4037         v =  {4,5,6}  [size = nz = 6]
4038 
4039 .keywords: matrix, aij, compressed row, sparse, parallel
4040 
4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4042           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4043 @*/
4044 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4045 {
4046   PetscErrorCode ierr;
4047 
4048   PetscFunctionBegin;
4049   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4050   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4051   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4052   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4053   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4054   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4055   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4056   PetscFunctionReturn(0);
4057 }
4058 
4059 #undef __FUNCT__
4060 #define __FUNCT__ "MatCreateAIJ"
4061 /*@C
4062    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4063    (the default parallel PETSc format).  For good matrix assembly performance
4064    the user should preallocate the matrix storage by setting the parameters
4065    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4066    performance can be increased by more than a factor of 50.
4067 
4068    Collective on MPI_Comm
4069 
4070    Input Parameters:
4071 +  comm - MPI communicator
4072 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4073            This value should be the same as the local size used in creating the
4074            y vector for the matrix-vector product y = Ax.
4075 .  n - This value should be the same as the local size used in creating the
4076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4077        calculated if N is given) For square matrices n is almost always m.
4078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4080 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4081            (same value is used for all local rows)
4082 .  d_nnz - array containing the number of nonzeros in the various rows of the
4083            DIAGONAL portion of the local submatrix (possibly different for each row)
4084            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4085            The size of this array is equal to the number of local rows, i.e 'm'.
4086 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4087            submatrix (same value is used for all local rows).
4088 -  o_nnz - array containing the number of nonzeros in the various rows of the
4089            OFF-DIAGONAL portion of the local submatrix (possibly different for
4090            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4091            structure. The size of this array is equal to the number
4092            of local rows, i.e 'm'.
4093 
4094    Output Parameter:
4095 .  A - the matrix
4096 
4097    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4098    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4099    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4100 
4101    Notes:
4102    If the *_nnz parameter is given then the *_nz parameter is ignored
4103 
4104    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4105    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4106    storage requirements for this matrix.
4107 
4108    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4109    processor than it must be used on all processors that share the object for
4110    that argument.
4111 
4112    The user MUST specify either the local or global matrix dimensions
4113    (possibly both).
4114 
4115    The parallel matrix is partitioned across processors such that the
4116    first m0 rows belong to process 0, the next m1 rows belong to
4117    process 1, the next m2 rows belong to process 2 etc.. where
4118    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4119    values corresponding to [m x N] submatrix.
4120 
4121    The columns are logically partitioned with the n0 columns belonging
4122    to 0th partition, the next n1 columns belonging to the next
4123    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4124 
4125    The DIAGONAL portion of the local submatrix on any given processor
4126    is the submatrix corresponding to the rows and columns m,n
4127    corresponding to the given processor. i.e diagonal matrix on
4128    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4129    etc. The remaining portion of the local submatrix [m x (N-n)]
4130    constitute the OFF-DIAGONAL portion. The example below better
4131    illustrates this concept.
4132 
4133    For a square global matrix we define each processor's diagonal portion
4134    to be its local rows and the corresponding columns (a square submatrix);
4135    each processor's off-diagonal portion encompasses the remainder of the
4136    local matrix (a rectangular submatrix).
4137 
4138    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4139 
4140    When calling this routine with a single process communicator, a matrix of
4141    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4142    type of communicator, use the construction mechanism:
4143      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4144 
4145    By default, this format uses inodes (identical nodes) when possible.
4146    We search for consecutive rows with the same nonzero structure, thereby
4147    reusing matrix information to achieve increased efficiency.
4148 
4149    Options Database Keys:
4150 +  -mat_no_inode  - Do not use inodes
4151 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4152 -  -mat_aij_oneindex - Internally use indexing starting at 1
4153         rather than 0.  Note that when calling MatSetValues(),
4154         the user still MUST index entries starting at 0!
4155 
4156 
4157    Example usage:
4158 
4159    Consider the following 8x8 matrix with 34 non-zero values, that is
4160    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4161    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4162    as follows:
4163 
4164 .vb
4165             1  2  0  |  0  3  0  |  0  4
4166     Proc0   0  5  6  |  7  0  0  |  8  0
4167             9  0 10  | 11  0  0  | 12  0
4168     -------------------------------------
4169            13  0 14  | 15 16 17  |  0  0
4170     Proc1   0 18  0  | 19 20 21  |  0  0
4171             0  0  0  | 22 23  0  | 24  0
4172     -------------------------------------
4173     Proc2  25 26 27  |  0  0 28  | 29  0
4174            30  0  0  | 31 32 33  |  0 34
4175 .ve
4176 
4177    This can be represented as a collection of submatrices as:
4178 
4179 .vb
4180       A B C
4181       D E F
4182       G H I
4183 .ve
4184 
4185    Where the submatrices A,B,C are owned by proc0, D,E,F are
4186    owned by proc1, G,H,I are owned by proc2.
4187 
4188    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4189    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4190    The 'M','N' parameters are 8,8, and have the same values on all procs.
4191 
4192    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4193    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4194    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4195    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4196    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4197    matrix, ans [DF] as another SeqAIJ matrix.
4198 
4199    When d_nz, o_nz parameters are specified, d_nz storage elements are
4200    allocated for every row of the local diagonal submatrix, and o_nz
4201    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4202    One way to choose d_nz and o_nz is to use the max nonzerors per local
4203    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4204    In this case, the values of d_nz,o_nz are:
4205 .vb
4206      proc0 : dnz = 2, o_nz = 2
4207      proc1 : dnz = 3, o_nz = 2
4208      proc2 : dnz = 1, o_nz = 4
4209 .ve
4210    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4211    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4212    for proc3. i.e we are using 12+15+10=37 storage locations to store
4213    34 values.
4214 
4215    When d_nnz, o_nnz parameters are specified, the storage is specified
4216    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4217    In the above case the values for d_nnz,o_nnz are:
4218 .vb
4219      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4220      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4221      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4222 .ve
4223    Here the space allocated is sum of all the above values i.e 34, and
4224    hence pre-allocation is perfect.
4225 
4226    Level: intermediate
4227 
4228 .keywords: matrix, aij, compressed row, sparse, parallel
4229 
4230 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4231           MPIAIJ, MatCreateMPIAIJWithArrays()
4232 @*/
4233 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4234 {
4235   PetscErrorCode ierr;
4236   PetscMPIInt    size;
4237 
4238   PetscFunctionBegin;
4239   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4240   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4241   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4242   if (size > 1) {
4243     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4244     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4245   } else {
4246     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4247     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4248   }
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 #undef __FUNCT__
4253 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4254 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4255 {
4256   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4257 
4258   PetscFunctionBegin;
4259   *Ad     = a->A;
4260   *Ao     = a->B;
4261   *colmap = a->garray;
4262   PetscFunctionReturn(0);
4263 }
4264 
4265 #undef __FUNCT__
4266 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4267 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4268 {
4269   PetscErrorCode ierr;
4270   PetscInt       i;
4271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4272 
4273   PetscFunctionBegin;
4274   if (coloring->ctype == IS_COLORING_GLOBAL) {
4275     ISColoringValue *allcolors,*colors;
4276     ISColoring      ocoloring;
4277 
4278     /* set coloring for diagonal portion */
4279     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4280 
4281     /* set coloring for off-diagonal portion */
4282     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4283     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4284     for (i=0; i<a->B->cmap->n; i++) {
4285       colors[i] = allcolors[a->garray[i]];
4286     }
4287     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4288     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4289     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4290     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4291   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4292     ISColoringValue *colors;
4293     PetscInt        *larray;
4294     ISColoring      ocoloring;
4295 
4296     /* set coloring for diagonal portion */
4297     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4298     for (i=0; i<a->A->cmap->n; i++) {
4299       larray[i] = i + A->cmap->rstart;
4300     }
4301     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4302     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4303     for (i=0; i<a->A->cmap->n; i++) {
4304       colors[i] = coloring->colors[larray[i]];
4305     }
4306     ierr = PetscFree(larray);CHKERRQ(ierr);
4307     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4308     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4309     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4310 
4311     /* set coloring for off-diagonal portion */
4312     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4313     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4314     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4315     for (i=0; i<a->B->cmap->n; i++) {
4316       colors[i] = coloring->colors[larray[i]];
4317     }
4318     ierr = PetscFree(larray);CHKERRQ(ierr);
4319     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4320     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4321     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4322   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4323 
4324   PetscFunctionReturn(0);
4325 }
4326 
4327 #if defined(PETSC_HAVE_ADIC)
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4330 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4331 {
4332   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4333   PetscErrorCode ierr;
4334 
4335   PetscFunctionBegin;
4336   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4337   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4338   PetscFunctionReturn(0);
4339 }
4340 #endif
4341 
4342 #undef __FUNCT__
4343 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4344 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4345 {
4346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4347   PetscErrorCode ierr;
4348 
4349   PetscFunctionBegin;
4350   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4351   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4357 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4358 {
4359   PetscErrorCode ierr;
4360   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4361   PetscInt       *indx;
4362 
4363   PetscFunctionBegin;
4364   /* This routine will ONLY return MPIAIJ type matrix */
4365   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4366   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4367   if (n == PETSC_DECIDE){
4368     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4369   }
4370   /* Check sum(n) = N */
4371   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4372   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4373 
4374   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4375   rstart -= m;
4376 
4377   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4378   for (i=0;i<m;i++) {
4379     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4380     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4381     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4382   }
4383 
4384   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4385   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4386   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4387   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4388   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4389   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4390   PetscFunctionReturn(0);
4391 }
4392 
4393 #undef __FUNCT__
4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4395 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4396 {
4397   PetscErrorCode ierr;
4398   PetscInt       m,N,i,rstart,nnz,Ii;
4399   PetscInt       *indx;
4400   PetscScalar    *values;
4401 
4402   PetscFunctionBegin;
4403   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4404   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4405   for (i=0;i<m;i++) {
4406     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4407     Ii    = i + rstart;
4408     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4409     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4410   }
4411   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4412   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4418 /*@
4419       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4420                  matrices from each processor
4421 
4422     Collective on MPI_Comm
4423 
4424    Input Parameters:
4425 +    comm - the communicators the parallel matrix will live on
4426 .    inmat - the input sequential matrices
4427 .    n - number of local columns (or PETSC_DECIDE)
4428 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4429 
4430    Output Parameter:
4431 .    outmat - the parallel matrix generated
4432 
4433     Level: advanced
4434 
4435    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4436 
4437 @*/
4438 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4439 {
4440   PetscErrorCode ierr;
4441 
4442   PetscFunctionBegin;
4443   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4444   if (scall == MAT_INITIAL_MATRIX){
4445     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4446   }
4447   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4448   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatFileSplit"
4454 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4455 {
4456   PetscErrorCode    ierr;
4457   PetscMPIInt       rank;
4458   PetscInt          m,N,i,rstart,nnz;
4459   size_t            len;
4460   const PetscInt    *indx;
4461   PetscViewer       out;
4462   char              *name;
4463   Mat               B;
4464   const PetscScalar *values;
4465 
4466   PetscFunctionBegin;
4467   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4468   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4469   /* Should this be the type of the diagonal block of A? */
4470   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4471   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4472   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4473   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4474   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4475   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4476   for (i=0;i<m;i++) {
4477     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4478     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4479     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4480   }
4481   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4482   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4483 
4484   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4485   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4486   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4487   sprintf(name,"%s.%d",outfile,rank);
4488   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4489   ierr = PetscFree(name);
4490   ierr = MatView(B,out);CHKERRQ(ierr);
4491   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4492   ierr = MatDestroy(&B);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4497 #undef __FUNCT__
4498 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4499 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4500 {
4501   PetscErrorCode       ierr;
4502   Mat_Merge_SeqsToMPI  *merge;
4503   PetscContainer       container;
4504 
4505   PetscFunctionBegin;
4506   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4507   if (container) {
4508     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4511     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4516     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4517     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4518     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4519     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4520     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4521     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4522     ierr = PetscFree(merge);CHKERRQ(ierr);
4523     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4524   }
4525   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4526   PetscFunctionReturn(0);
4527 }
4528 
4529 #include <../src/mat/utils/freespace.h>
4530 #include <petscbt.h>
4531 
4532 #undef __FUNCT__
4533 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4534 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4535 {
4536   PetscErrorCode       ierr;
4537   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4538   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4539   PetscMPIInt          size,rank,taga,*len_s;
4540   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4541   PetscInt             proc,m;
4542   PetscInt             **buf_ri,**buf_rj;
4543   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4544   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4545   MPI_Request          *s_waits,*r_waits;
4546   MPI_Status           *status;
4547   MatScalar            *aa=a->a;
4548   MatScalar            **abuf_r,*ba_i;
4549   Mat_Merge_SeqsToMPI  *merge;
4550   PetscContainer       container;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4554 
4555   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4556   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4557 
4558   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4559   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4560 
4561   bi     = merge->bi;
4562   bj     = merge->bj;
4563   buf_ri = merge->buf_ri;
4564   buf_rj = merge->buf_rj;
4565 
4566   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4567   owners = merge->rowmap->range;
4568   len_s  = merge->len_s;
4569 
4570   /* send and recv matrix values */
4571   /*-----------------------------*/
4572   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4573   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4574 
4575   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4576   for (proc=0,k=0; proc<size; proc++){
4577     if (!len_s[proc]) continue;
4578     i = owners[proc];
4579     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4580     k++;
4581   }
4582 
4583   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4584   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4585   ierr = PetscFree(status);CHKERRQ(ierr);
4586 
4587   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4588   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4589 
4590   /* insert mat values of mpimat */
4591   /*----------------------------*/
4592   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4593   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4594 
4595   for (k=0; k<merge->nrecv; k++){
4596     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4597     nrows = *(buf_ri_k[k]);
4598     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4599     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4600   }
4601 
4602   /* set values of ba */
4603   m = merge->rowmap->n;
4604   for (i=0; i<m; i++) {
4605     arow = owners[rank] + i;
4606     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4607     bnzi = bi[i+1] - bi[i];
4608     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4609 
4610     /* add local non-zero vals of this proc's seqmat into ba */
4611     anzi = ai[arow+1] - ai[arow];
4612     aj   = a->j + ai[arow];
4613     aa   = a->a + ai[arow];
4614     nextaj = 0;
4615     for (j=0; nextaj<anzi; j++){
4616       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4617         ba_i[j] += aa[nextaj++];
4618       }
4619     }
4620 
4621     /* add received vals into ba */
4622     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4623       /* i-th row */
4624       if (i == *nextrow[k]) {
4625         anzi = *(nextai[k]+1) - *nextai[k];
4626         aj   = buf_rj[k] + *(nextai[k]);
4627         aa   = abuf_r[k] + *(nextai[k]);
4628         nextaj = 0;
4629         for (j=0; nextaj<anzi; j++){
4630           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4631             ba_i[j] += aa[nextaj++];
4632           }
4633         }
4634         nextrow[k]++; nextai[k]++;
4635       }
4636     }
4637     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4638   }
4639   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4640   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4641 
4642   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4643   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4644   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4645   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4646   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4647   PetscFunctionReturn(0);
4648 }
4649 
4650 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4654 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4655 {
4656   PetscErrorCode       ierr;
4657   Mat                  B_mpi;
4658   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4659   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4660   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4661   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4662   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4663   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4664   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4665   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4666   MPI_Status           *status;
4667   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4668   PetscBT              lnkbt;
4669   Mat_Merge_SeqsToMPI  *merge;
4670   PetscContainer       container;
4671 
4672   PetscFunctionBegin;
4673   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4674 
4675   /* make sure it is a PETSc comm */
4676   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4678   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4679 
4680   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4681   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4682 
4683   /* determine row ownership */
4684   /*---------------------------------------------------------*/
4685   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4686   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4687   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4688   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4689   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4690   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4691   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4692 
4693   m      = merge->rowmap->n;
4694   M      = merge->rowmap->N;
4695   owners = merge->rowmap->range;
4696 
4697   /* determine the number of messages to send, their lengths */
4698   /*---------------------------------------------------------*/
4699   len_s  = merge->len_s;
4700 
4701   len = 0;  /* length of buf_si[] */
4702   merge->nsend = 0;
4703   for (proc=0; proc<size; proc++){
4704     len_si[proc] = 0;
4705     if (proc == rank){
4706       len_s[proc] = 0;
4707     } else {
4708       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4709       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4710     }
4711     if (len_s[proc]) {
4712       merge->nsend++;
4713       nrows = 0;
4714       for (i=owners[proc]; i<owners[proc+1]; i++){
4715         if (ai[i+1] > ai[i]) nrows++;
4716       }
4717       len_si[proc] = 2*(nrows+1);
4718       len += len_si[proc];
4719     }
4720   }
4721 
4722   /* determine the number and length of messages to receive for ij-structure */
4723   /*-------------------------------------------------------------------------*/
4724   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4725   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4726 
4727   /* post the Irecv of j-structure */
4728   /*-------------------------------*/
4729   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4730   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4731 
4732   /* post the Isend of j-structure */
4733   /*--------------------------------*/
4734   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4735 
4736   for (proc=0, k=0; proc<size; proc++){
4737     if (!len_s[proc]) continue;
4738     i = owners[proc];
4739     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4740     k++;
4741   }
4742 
4743   /* receives and sends of j-structure are complete */
4744   /*------------------------------------------------*/
4745   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4746   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4747 
4748   /* send and recv i-structure */
4749   /*---------------------------*/
4750   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4751   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4752 
4753   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4754   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4755   for (proc=0,k=0; proc<size; proc++){
4756     if (!len_s[proc]) continue;
4757     /* form outgoing message for i-structure:
4758          buf_si[0]:                 nrows to be sent
4759                [1:nrows]:           row index (global)
4760                [nrows+1:2*nrows+1]: i-structure index
4761     */
4762     /*-------------------------------------------*/
4763     nrows = len_si[proc]/2 - 1;
4764     buf_si_i    = buf_si + nrows+1;
4765     buf_si[0]   = nrows;
4766     buf_si_i[0] = 0;
4767     nrows = 0;
4768     for (i=owners[proc]; i<owners[proc+1]; i++){
4769       anzi = ai[i+1] - ai[i];
4770       if (anzi) {
4771         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4772         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4773         nrows++;
4774       }
4775     }
4776     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4777     k++;
4778     buf_si += len_si[proc];
4779   }
4780 
4781   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4782   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4783 
4784   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4785   for (i=0; i<merge->nrecv; i++){
4786     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4787   }
4788 
4789   ierr = PetscFree(len_si);CHKERRQ(ierr);
4790   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4791   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4792   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4793   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4794   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4795   ierr = PetscFree(status);CHKERRQ(ierr);
4796 
4797   /* compute a local seq matrix in each processor */
4798   /*----------------------------------------------*/
4799   /* allocate bi array and free space for accumulating nonzero column info */
4800   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4801   bi[0] = 0;
4802 
4803   /* create and initialize a linked list */
4804   nlnk = N+1;
4805   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4806 
4807   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4808   len = 0;
4809   len  = ai[owners[rank+1]] - ai[owners[rank]];
4810   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4811   current_space = free_space;
4812 
4813   /* determine symbolic info for each local row */
4814   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4815 
4816   for (k=0; k<merge->nrecv; k++){
4817     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4818     nrows = *buf_ri_k[k];
4819     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4820     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4821   }
4822 
4823   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4824   len = 0;
4825   for (i=0;i<m;i++) {
4826     bnzi   = 0;
4827     /* add local non-zero cols of this proc's seqmat into lnk */
4828     arow   = owners[rank] + i;
4829     anzi   = ai[arow+1] - ai[arow];
4830     aj     = a->j + ai[arow];
4831     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4832     bnzi += nlnk;
4833     /* add received col data into lnk */
4834     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4835       if (i == *nextrow[k]) { /* i-th row */
4836         anzi = *(nextai[k]+1) - *nextai[k];
4837         aj   = buf_rj[k] + *nextai[k];
4838         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4839         bnzi += nlnk;
4840         nextrow[k]++; nextai[k]++;
4841       }
4842     }
4843     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4844 
4845     /* if free space is not available, make more free space */
4846     if (current_space->local_remaining<bnzi) {
4847       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4848       nspacedouble++;
4849     }
4850     /* copy data into free space, then initialize lnk */
4851     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4852     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4853 
4854     current_space->array           += bnzi;
4855     current_space->local_used      += bnzi;
4856     current_space->local_remaining -= bnzi;
4857 
4858     bi[i+1] = bi[i] + bnzi;
4859   }
4860 
4861   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4862 
4863   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4864   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4865   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4866 
4867   /* create symbolic parallel matrix B_mpi */
4868   /*---------------------------------------*/
4869     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4870   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4871   if (n==PETSC_DECIDE) {
4872     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4873   } else {
4874     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4875   }
4876   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4877   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4878   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4879   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4880   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4881 
4882   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4883   B_mpi->assembled     = PETSC_FALSE;
4884   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4885   merge->bi            = bi;
4886   merge->bj            = bj;
4887   merge->buf_ri        = buf_ri;
4888   merge->buf_rj        = buf_rj;
4889   merge->coi           = PETSC_NULL;
4890   merge->coj           = PETSC_NULL;
4891   merge->owners_co     = PETSC_NULL;
4892 
4893   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4894 
4895   /* attach the supporting struct to B_mpi for reuse */
4896   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4897   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4898   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4899   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4900   *mpimat = B_mpi;
4901 
4902   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4903   PetscFunctionReturn(0);
4904 }
4905 
4906 #undef __FUNCT__
4907 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4908 /*@C
4909       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4910                  matrices from each processor
4911 
4912     Collective on MPI_Comm
4913 
4914    Input Parameters:
4915 +    comm - the communicators the parallel matrix will live on
4916 .    seqmat - the input sequential matrices
4917 .    m - number of local rows (or PETSC_DECIDE)
4918 .    n - number of local columns (or PETSC_DECIDE)
4919 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4920 
4921    Output Parameter:
4922 .    mpimat - the parallel matrix generated
4923 
4924     Level: advanced
4925 
4926    Notes:
4927      The dimensions of the sequential matrix in each processor MUST be the same.
4928      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4929      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4930 @*/
4931 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4932 {
4933   PetscErrorCode   ierr;
4934   PetscMPIInt     size;
4935 
4936   PetscFunctionBegin;
4937   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4938   if (size == 1){
4939      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4940      if (scall == MAT_INITIAL_MATRIX){
4941        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4942      } else {
4943        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4944      }
4945      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4946      PetscFunctionReturn(0);
4947   }
4948   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4949   if (scall == MAT_INITIAL_MATRIX){
4950     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4951   }
4952   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4953   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4954   PetscFunctionReturn(0);
4955 }
4956 
4957 #undef __FUNCT__
4958 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4959 /*@
4960      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4961           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4962           with MatGetSize()
4963 
4964     Not Collective
4965 
4966    Input Parameters:
4967 +    A - the matrix
4968 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4969 
4970    Output Parameter:
4971 .    A_loc - the local sequential matrix generated
4972 
4973     Level: developer
4974 
4975 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4976 
4977 @*/
4978 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4979 {
4980   PetscErrorCode  ierr;
4981   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4982   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4983   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4984   MatScalar       *aa=a->a,*ba=b->a,*cam;
4985   PetscScalar     *ca;
4986   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4987   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4988   PetscBool       match;
4989 
4990   PetscFunctionBegin;
4991   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4992   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4993   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4994   if (scall == MAT_INITIAL_MATRIX){
4995     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4996     ci[0] = 0;
4997     for (i=0; i<am; i++){
4998       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4999     }
5000     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5001     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5002     k = 0;
5003     for (i=0; i<am; i++) {
5004       ncols_o = bi[i+1] - bi[i];
5005       ncols_d = ai[i+1] - ai[i];
5006       /* off-diagonal portion of A */
5007       for (jo=0; jo<ncols_o; jo++) {
5008         col = cmap[*bj];
5009         if (col >= cstart) break;
5010         cj[k]   = col; bj++;
5011         ca[k++] = *ba++;
5012       }
5013       /* diagonal portion of A */
5014       for (j=0; j<ncols_d; j++) {
5015         cj[k]   = cstart + *aj++;
5016         ca[k++] = *aa++;
5017       }
5018       /* off-diagonal portion of A */
5019       for (j=jo; j<ncols_o; j++) {
5020         cj[k]   = cmap[*bj++];
5021         ca[k++] = *ba++;
5022       }
5023     }
5024     /* put together the new matrix */
5025     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5026     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5027     /* Since these are PETSc arrays, change flags to free them as necessary. */
5028     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5029     mat->free_a  = PETSC_TRUE;
5030     mat->free_ij = PETSC_TRUE;
5031     mat->nonew   = 0;
5032   } else if (scall == MAT_REUSE_MATRIX){
5033     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5034     ci = mat->i; cj = mat->j; cam = mat->a;
5035     for (i=0; i<am; i++) {
5036       /* off-diagonal portion of A */
5037       ncols_o = bi[i+1] - bi[i];
5038       for (jo=0; jo<ncols_o; jo++) {
5039         col = cmap[*bj];
5040         if (col >= cstart) break;
5041         *cam++ = *ba++; bj++;
5042       }
5043       /* diagonal portion of A */
5044       ncols_d = ai[i+1] - ai[i];
5045       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5046       /* off-diagonal portion of A */
5047       for (j=jo; j<ncols_o; j++) {
5048         *cam++ = *ba++; bj++;
5049       }
5050     }
5051   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5052   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 #undef __FUNCT__
5057 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5058 /*@C
5059      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5060 
5061     Not Collective
5062 
5063    Input Parameters:
5064 +    A - the matrix
5065 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5067 
5068    Output Parameter:
5069 .    A_loc - the local sequential matrix generated
5070 
5071     Level: developer
5072 
5073 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5074 
5075 @*/
5076 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5077 {
5078   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5079   PetscErrorCode    ierr;
5080   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5081   IS                isrowa,iscola;
5082   Mat               *aloc;
5083   PetscBool       match;
5084 
5085   PetscFunctionBegin;
5086   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5087   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5088   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5089   if (!row){
5090     start = A->rmap->rstart; end = A->rmap->rend;
5091     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5092   } else {
5093     isrowa = *row;
5094   }
5095   if (!col){
5096     start = A->cmap->rstart;
5097     cmap  = a->garray;
5098     nzA   = a->A->cmap->n;
5099     nzB   = a->B->cmap->n;
5100     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5101     ncols = 0;
5102     for (i=0; i<nzB; i++) {
5103       if (cmap[i] < start) idx[ncols++] = cmap[i];
5104       else break;
5105     }
5106     imark = i;
5107     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5108     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5109     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5110   } else {
5111     iscola = *col;
5112   }
5113   if (scall != MAT_INITIAL_MATRIX){
5114     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5115     aloc[0] = *A_loc;
5116   }
5117   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5118   *A_loc = aloc[0];
5119   ierr = PetscFree(aloc);CHKERRQ(ierr);
5120   if (!row){
5121     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5122   }
5123   if (!col){
5124     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5125   }
5126   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   PetscFunctionReturn(0);
5128 }
5129 
5130 #undef __FUNCT__
5131 #define __FUNCT__ "MatGetBrowsOfAcols"
5132 /*@C
5133     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5134 
5135     Collective on Mat
5136 
5137    Input Parameters:
5138 +    A,B - the matrices in mpiaij format
5139 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5140 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5141 
5142    Output Parameter:
5143 +    rowb, colb - index sets of rows and columns of B to extract
5144 -    B_seq - the sequential matrix generated
5145 
5146     Level: developer
5147 
5148 @*/
5149 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5150 {
5151   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5152   PetscErrorCode    ierr;
5153   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5154   IS                isrowb,iscolb;
5155   Mat               *bseq=PETSC_NULL;
5156 
5157   PetscFunctionBegin;
5158   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5159     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5160   }
5161   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5162 
5163   if (scall == MAT_INITIAL_MATRIX){
5164     start = A->cmap->rstart;
5165     cmap  = a->garray;
5166     nzA   = a->A->cmap->n;
5167     nzB   = a->B->cmap->n;
5168     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5169     ncols = 0;
5170     for (i=0; i<nzB; i++) {  /* row < local row index */
5171       if (cmap[i] < start) idx[ncols++] = cmap[i];
5172       else break;
5173     }
5174     imark = i;
5175     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5176     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5177     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5178     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5179   } else {
5180     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5181     isrowb = *rowb; iscolb = *colb;
5182     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5183     bseq[0] = *B_seq;
5184   }
5185   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5186   *B_seq = bseq[0];
5187   ierr = PetscFree(bseq);CHKERRQ(ierr);
5188   if (!rowb){
5189     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5190   } else {
5191     *rowb = isrowb;
5192   }
5193   if (!colb){
5194     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5195   } else {
5196     *colb = iscolb;
5197   }
5198   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5199   PetscFunctionReturn(0);
5200 }
5201 
5202 #undef __FUNCT__
5203 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5204 /*
5205     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5206     of the OFF-DIAGONAL portion of local A
5207 
5208     Collective on Mat
5209 
5210    Input Parameters:
5211 +    A,B - the matrices in mpiaij format
5212 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5213 
5214    Output Parameter:
5215 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5216 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5217 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5218 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5219 
5220     Level: developer
5221 
5222 */
5223 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5224 {
5225   VecScatter_MPI_General *gen_to,*gen_from;
5226   PetscErrorCode         ierr;
5227   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5228   Mat_SeqAIJ             *b_oth;
5229   VecScatter             ctx=a->Mvctx;
5230   MPI_Comm               comm=((PetscObject)ctx)->comm;
5231   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5232   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5233   PetscScalar            *rvalues,*svalues;
5234   MatScalar              *b_otha,*bufa,*bufA;
5235   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5236   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5237   MPI_Status             *sstatus,rstatus;
5238   PetscMPIInt            jj;
5239   PetscInt               *cols,sbs,rbs;
5240   PetscScalar            *vals;
5241 
5242   PetscFunctionBegin;
5243   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5244     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5245   }
5246   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5247   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5248 
5249   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5250   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5251   rvalues  = gen_from->values; /* holds the length of receiving row */
5252   svalues  = gen_to->values;   /* holds the length of sending row */
5253   nrecvs   = gen_from->n;
5254   nsends   = gen_to->n;
5255 
5256   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5257   srow     = gen_to->indices;   /* local row index to be sent */
5258   sstarts  = gen_to->starts;
5259   sprocs   = gen_to->procs;
5260   sstatus  = gen_to->sstatus;
5261   sbs      = gen_to->bs;
5262   rstarts  = gen_from->starts;
5263   rprocs   = gen_from->procs;
5264   rbs      = gen_from->bs;
5265 
5266   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5267   if (scall == MAT_INITIAL_MATRIX){
5268     /* i-array */
5269     /*---------*/
5270     /*  post receives */
5271     for (i=0; i<nrecvs; i++){
5272       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5273       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5274       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5275     }
5276 
5277     /* pack the outgoing message */
5278     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5279     sstartsj[0] = 0;  rstartsj[0] = 0;
5280     len = 0; /* total length of j or a array to be sent */
5281     k = 0;
5282     for (i=0; i<nsends; i++){
5283       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5284       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5285       for (j=0; j<nrows; j++) {
5286         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5287         for (l=0; l<sbs; l++){
5288           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5289           rowlen[j*sbs+l] = ncols;
5290           len += ncols;
5291           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5292         }
5293         k++;
5294       }
5295       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5296       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5297     }
5298     /* recvs and sends of i-array are completed */
5299     i = nrecvs;
5300     while (i--) {
5301       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5302     }
5303     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5304 
5305     /* allocate buffers for sending j and a arrays */
5306     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5307     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5308 
5309     /* create i-array of B_oth */
5310     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5311     b_othi[0] = 0;
5312     len = 0; /* total length of j or a array to be received */
5313     k = 0;
5314     for (i=0; i<nrecvs; i++){
5315       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5316       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5317       for (j=0; j<nrows; j++) {
5318         b_othi[k+1] = b_othi[k] + rowlen[j];
5319         len += rowlen[j]; k++;
5320       }
5321       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5322     }
5323 
5324     /* allocate space for j and a arrrays of B_oth */
5325     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5326     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5327 
5328     /* j-array */
5329     /*---------*/
5330     /*  post receives of j-array */
5331     for (i=0; i<nrecvs; i++){
5332       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5333       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5334     }
5335 
5336     /* pack the outgoing message j-array */
5337     k = 0;
5338     for (i=0; i<nsends; i++){
5339       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5340       bufJ = bufj+sstartsj[i];
5341       for (j=0; j<nrows; j++) {
5342         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5343         for (ll=0; ll<sbs; ll++){
5344           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5345           for (l=0; l<ncols; l++){
5346             *bufJ++ = cols[l];
5347           }
5348           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5349         }
5350       }
5351       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5352     }
5353 
5354     /* recvs and sends of j-array are completed */
5355     i = nrecvs;
5356     while (i--) {
5357       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5358     }
5359     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5360   } else if (scall == MAT_REUSE_MATRIX){
5361     sstartsj = *startsj_s;
5362     rstartsj = *startsj_r;
5363     bufa     = *bufa_ptr;
5364     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5365     b_otha   = b_oth->a;
5366   } else {
5367     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5368   }
5369 
5370   /* a-array */
5371   /*---------*/
5372   /*  post receives of a-array */
5373   for (i=0; i<nrecvs; i++){
5374     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5375     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5376   }
5377 
5378   /* pack the outgoing message a-array */
5379   k = 0;
5380   for (i=0; i<nsends; i++){
5381     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5382     bufA = bufa+sstartsj[i];
5383     for (j=0; j<nrows; j++) {
5384       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5385       for (ll=0; ll<sbs; ll++){
5386         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5387         for (l=0; l<ncols; l++){
5388           *bufA++ = vals[l];
5389         }
5390         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5391       }
5392     }
5393     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5394   }
5395   /* recvs and sends of a-array are completed */
5396   i = nrecvs;
5397   while (i--) {
5398     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5399   }
5400   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5401   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5402 
5403   if (scall == MAT_INITIAL_MATRIX){
5404     /* put together the new matrix */
5405     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5406 
5407     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5408     /* Since these are PETSc arrays, change flags to free them as necessary. */
5409     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5410     b_oth->free_a  = PETSC_TRUE;
5411     b_oth->free_ij = PETSC_TRUE;
5412     b_oth->nonew   = 0;
5413 
5414     ierr = PetscFree(bufj);CHKERRQ(ierr);
5415     if (!startsj_s || !bufa_ptr){
5416       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5417       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5418     } else {
5419       *startsj_s = sstartsj;
5420       *startsj_r = rstartsj;
5421       *bufa_ptr  = bufa;
5422     }
5423   }
5424   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5425   PetscFunctionReturn(0);
5426 }
5427 
5428 #undef __FUNCT__
5429 #define __FUNCT__ "MatGetCommunicationStructs"
5430 /*@C
5431   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5432 
5433   Not Collective
5434 
5435   Input Parameters:
5436 . A - The matrix in mpiaij format
5437 
5438   Output Parameter:
5439 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5440 . colmap - A map from global column index to local index into lvec
5441 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5442 
5443   Level: developer
5444 
5445 @*/
5446 #if defined (PETSC_USE_CTABLE)
5447 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5448 #else
5449 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5450 #endif
5451 {
5452   Mat_MPIAIJ *a;
5453 
5454   PetscFunctionBegin;
5455   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5456   PetscValidPointer(lvec, 2);
5457   PetscValidPointer(colmap, 3);
5458   PetscValidPointer(multScatter, 4);
5459   a = (Mat_MPIAIJ *) A->data;
5460   if (lvec) *lvec = a->lvec;
5461   if (colmap) *colmap = a->colmap;
5462   if (multScatter) *multScatter = a->Mvctx;
5463   PetscFunctionReturn(0);
5464 }
5465 
5466 EXTERN_C_BEGIN
5467 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5468 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5469 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5470 EXTERN_C_END
5471 
5472 #undef __FUNCT__
5473 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5474 /*
5475     Computes (B'*A')' since computing B*A directly is untenable
5476 
5477                n                       p                          p
5478         (              )       (              )         (                  )
5479       m (      A       )  *  n (       B      )   =   m (         C        )
5480         (              )       (              )         (                  )
5481 
5482 */
5483 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5484 {
5485   PetscErrorCode     ierr;
5486   Mat                At,Bt,Ct;
5487 
5488   PetscFunctionBegin;
5489   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5490   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5491   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5492   ierr = MatDestroy(&At);CHKERRQ(ierr);
5493   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5494   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5495   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 #undef __FUNCT__
5500 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5501 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5502 {
5503   PetscErrorCode ierr;
5504   PetscInt       m=A->rmap->n,n=B->cmap->n;
5505   Mat            Cmat;
5506 
5507   PetscFunctionBegin;
5508   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5509   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5510   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5511   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5512   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5513   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5514   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5515   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5516   *C   = Cmat;
5517   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5518   PetscFunctionReturn(0);
5519 }
5520 
5521 /* ----------------------------------------------------------------*/
5522 #undef __FUNCT__
5523 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5524 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5525 {
5526   PetscErrorCode ierr;
5527 
5528   PetscFunctionBegin;
5529   if (scall == MAT_INITIAL_MATRIX){
5530     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5531   }
5532   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5533   PetscFunctionReturn(0);
5534 }
5535 
5536 EXTERN_C_BEGIN
5537 #if defined(PETSC_HAVE_MUMPS)
5538 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5539 #endif
5540 #if defined(PETSC_HAVE_PASTIX)
5541 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5542 #endif
5543 #if defined(PETSC_HAVE_SUPERLU_DIST)
5544 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5545 #endif
5546 #if defined(PETSC_HAVE_SPOOLES)
5547 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5548 #endif
5549 #if defined(PETSC_HAVE_CLIQUE)
5550 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5551 #endif
5552 EXTERN_C_END
5553 
5554 /*MC
5555    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5556 
5557    Options Database Keys:
5558 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5559 
5560   Level: beginner
5561 
5562 .seealso: MatCreateAIJ()
5563 M*/
5564 
5565 EXTERN_C_BEGIN
5566 #undef __FUNCT__
5567 #define __FUNCT__ "MatCreate_MPIAIJ"
5568 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5569 {
5570   Mat_MPIAIJ     *b;
5571   PetscErrorCode ierr;
5572   PetscMPIInt    size;
5573 
5574   PetscFunctionBegin;
5575   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5576   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5577   B->data         = (void*)b;
5578   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5579   B->assembled    = PETSC_FALSE;
5580   B->insertmode   = NOT_SET_VALUES;
5581   b->size         = size;
5582   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5583 
5584   /* build cache for off array entries formed */
5585   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5586   b->donotstash  = PETSC_FALSE;
5587   b->colmap      = 0;
5588   b->garray      = 0;
5589   b->roworiented = PETSC_TRUE;
5590 
5591   /* stuff used for matrix vector multiply */
5592   b->lvec      = PETSC_NULL;
5593   b->Mvctx     = PETSC_NULL;
5594 
5595   /* stuff for MatGetRow() */
5596   b->rowindices   = 0;
5597   b->rowvalues    = 0;
5598   b->getrowactive = PETSC_FALSE;
5599 
5600   /* flexible pointer used in CUSP/CUSPARSE classes */
5601   b->spptr        = PETSC_NULL;
5602 #if defined(PETSC_HAVE_SPOOLES)
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5604                                      "MatGetFactor_mpiaij_spooles",
5605                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5606 #endif
5607 #if defined(PETSC_HAVE_MUMPS)
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5609                                      "MatGetFactor_aij_mumps",
5610                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5611 #endif
5612 #if defined(PETSC_HAVE_PASTIX)
5613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5614 					   "MatGetFactor_mpiaij_pastix",
5615 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5616 #endif
5617 #if defined(PETSC_HAVE_SUPERLU_DIST)
5618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5619                                      "MatGetFactor_mpiaij_superlu_dist",
5620                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5621 #endif
5622 #if defined(PETSC_HAVE_CLIQUE)
5623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5624                                      "MatGetFactor_aij_clique",
5625                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5626 #endif
5627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5628                                      "MatStoreValues_MPIAIJ",
5629                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5631                                      "MatRetrieveValues_MPIAIJ",
5632                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5634 				     "MatGetDiagonalBlock_MPIAIJ",
5635                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5637 				     "MatIsTranspose_MPIAIJ",
5638 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5640 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5641 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5643 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5644 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5646 				     "MatDiagonalScaleLocal_MPIAIJ",
5647 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5649                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5650                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5652                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5653                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5655                                      "MatConvert_MPIAIJ_MPISBAIJ",
5656                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5658                                      "MatMatMult_MPIDense_MPIAIJ",
5659                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5661                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5662                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5664                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5665                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5667   PetscFunctionReturn(0);
5668 }
5669 EXTERN_C_END
5670 
5671 #undef __FUNCT__
5672 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5673 /*@
5674      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5675          and "off-diagonal" part of the matrix in CSR format.
5676 
5677    Collective on MPI_Comm
5678 
5679    Input Parameters:
5680 +  comm - MPI communicator
5681 .  m - number of local rows (Cannot be PETSC_DECIDE)
5682 .  n - This value should be the same as the local size used in creating the
5683        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5684        calculated if N is given) For square matrices n is almost always m.
5685 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5686 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5687 .   i - row indices for "diagonal" portion of matrix
5688 .   j - column indices
5689 .   a - matrix values
5690 .   oi - row indices for "off-diagonal" portion of matrix
5691 .   oj - column indices
5692 -   oa - matrix values
5693 
5694    Output Parameter:
5695 .   mat - the matrix
5696 
5697    Level: advanced
5698 
5699    Notes:
5700        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5701        must free the arrays once the matrix has been destroyed and not before.
5702 
5703        The i and j indices are 0 based
5704 
5705        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5706 
5707        This sets local rows and cannot be used to set off-processor values.
5708 
5709        You cannot later use MatSetValues() to change values in this matrix.
5710 
5711 .keywords: matrix, aij, compressed row, sparse, parallel
5712 
5713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5714           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5715 @*/
5716 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5717 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5718 {
5719   PetscErrorCode ierr;
5720   Mat_MPIAIJ     *maij;
5721 
5722  PetscFunctionBegin;
5723   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5724   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5725   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5726   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5727   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5728   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5729   maij = (Mat_MPIAIJ*) (*mat)->data;
5730   maij->donotstash     = PETSC_TRUE;
5731   (*mat)->preallocated = PETSC_TRUE;
5732 
5733   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5734   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5735 
5736   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5737   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5738 
5739   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5740   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5741   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5742   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5743 
5744   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   PetscFunctionReturn(0);
5747 }
5748 
5749 /*
5750     Special version for direct calls from Fortran
5751 */
5752 #include <petsc-private/fortranimpl.h>
5753 
5754 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5755 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5756 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5757 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5758 #endif
5759 
5760 /* Change these macros so can be used in void function */
5761 #undef CHKERRQ
5762 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5763 #undef SETERRQ2
5764 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5765 #undef SETERRQ3
5766 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5767 #undef SETERRQ
5768 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5769 
5770 EXTERN_C_BEGIN
5771 #undef __FUNCT__
5772 #define __FUNCT__ "matsetvaluesmpiaij_"
5773 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5774 {
5775   Mat             mat = *mmat;
5776   PetscInt        m = *mm, n = *mn;
5777   InsertMode      addv = *maddv;
5778   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5779   PetscScalar     value;
5780   PetscErrorCode  ierr;
5781 
5782   MatCheckPreallocated(mat,1);
5783   if (mat->insertmode == NOT_SET_VALUES) {
5784     mat->insertmode = addv;
5785   }
5786 #if defined(PETSC_USE_DEBUG)
5787   else if (mat->insertmode != addv) {
5788     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5789   }
5790 #endif
5791   {
5792   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5793   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5794   PetscBool       roworiented = aij->roworiented;
5795 
5796   /* Some Variables required in the macro */
5797   Mat             A = aij->A;
5798   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5799   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5800   MatScalar       *aa = a->a;
5801   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5802   Mat             B = aij->B;
5803   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5804   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5805   MatScalar       *ba = b->a;
5806 
5807   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5808   PetscInt        nonew = a->nonew;
5809   MatScalar       *ap1,*ap2;
5810 
5811   PetscFunctionBegin;
5812   for (i=0; i<m; i++) {
5813     if (im[i] < 0) continue;
5814 #if defined(PETSC_USE_DEBUG)
5815     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5816 #endif
5817     if (im[i] >= rstart && im[i] < rend) {
5818       row      = im[i] - rstart;
5819       lastcol1 = -1;
5820       rp1      = aj + ai[row];
5821       ap1      = aa + ai[row];
5822       rmax1    = aimax[row];
5823       nrow1    = ailen[row];
5824       low1     = 0;
5825       high1    = nrow1;
5826       lastcol2 = -1;
5827       rp2      = bj + bi[row];
5828       ap2      = ba + bi[row];
5829       rmax2    = bimax[row];
5830       nrow2    = bilen[row];
5831       low2     = 0;
5832       high2    = nrow2;
5833 
5834       for (j=0; j<n; j++) {
5835         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5836         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5837         if (in[j] >= cstart && in[j] < cend){
5838           col = in[j] - cstart;
5839           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5840         } else if (in[j] < 0) continue;
5841 #if defined(PETSC_USE_DEBUG)
5842         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5843 #endif
5844         else {
5845           if (mat->was_assembled) {
5846             if (!aij->colmap) {
5847               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5848             }
5849 #if defined (PETSC_USE_CTABLE)
5850             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5851 	    col--;
5852 #else
5853             col = aij->colmap[in[j]] - 1;
5854 #endif
5855             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5856               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5857               col =  in[j];
5858               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5859               B = aij->B;
5860               b = (Mat_SeqAIJ*)B->data;
5861               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5862               rp2      = bj + bi[row];
5863               ap2      = ba + bi[row];
5864               rmax2    = bimax[row];
5865               nrow2    = bilen[row];
5866               low2     = 0;
5867               high2    = nrow2;
5868               bm       = aij->B->rmap->n;
5869               ba = b->a;
5870             }
5871           } else col = in[j];
5872           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5873         }
5874       }
5875     } else {
5876       if (!aij->donotstash) {
5877         if (roworiented) {
5878           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5879         } else {
5880           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5881         }
5882       }
5883     }
5884   }}
5885   PetscFunctionReturnVoid();
5886 }
5887 EXTERN_C_END
5888 
5889