xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision efcf75d53ad3cd882f5e025d03e21ee8d19e6773)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             }
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324 
1325   PetscFunctionBegin;
1326   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1327   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1328   nz   = A->nz + B->nz;
1329   if (!rank) {
1330     header[0] = MAT_FILE_CLASSID;
1331     header[1] = mat->rmap->N;
1332     header[2] = mat->cmap->N;
1333     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1335     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     /* get largest number of rows any processor has */
1337     rlen = mat->rmap->n;
1338     range = mat->rmap->range;
1339     for (i=1; i<size; i++) {
1340       rlen = PetscMax(rlen,range[i+1] - range[i]);
1341     }
1342   } else {
1343     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344     rlen = mat->rmap->n;
1345   }
1346 
1347   /* load up the local row counts */
1348   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1349   for (i=0; i<mat->rmap->n; i++) {
1350     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1351   }
1352 
1353   /* store the row lengths to the file */
1354   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1355   if (!rank) {
1356     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     for (i=1; i<size; i++) {
1358       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1359       rlen = range[i+1] - range[i];
1360       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1361       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1362     }
1363     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1364   } else {
1365     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1366     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1367     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1368   }
1369   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1370 
1371   /* load up the local column indices */
1372   nzmax = nz; /* )th processor needs space a largest processor needs */
1373   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1374   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1375   cnt  = 0;
1376   for (i=0; i<mat->rmap->n; i++) {
1377     for (j=B->i[i]; j<B->i[i+1]; j++) {
1378       if ( (col = garray[B->j[j]]) > cstart) break;
1379       column_indices[cnt++] = col;
1380     }
1381     for (k=A->i[i]; k<A->i[i+1]; k++) {
1382       column_indices[cnt++] = A->j[k] + cstart;
1383     }
1384     for (; j<B->i[i+1]; j++) {
1385       column_indices[cnt++] = garray[B->j[j]];
1386     }
1387   }
1388   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1389 
1390   /* store the column indices to the file */
1391    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1392   if (!rank) {
1393     MPI_Status status;
1394     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1395     for (i=1; i<size; i++) {
1396       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1397       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1398       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1399       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1400       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1401     }
1402      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1403   } else {
1404     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1405     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1406     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1408   }
1409   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1410 
1411   /* load up the local column values */
1412   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1413   cnt  = 0;
1414   for (i=0; i<mat->rmap->n; i++) {
1415     for (j=B->i[i]; j<B->i[i+1]; j++) {
1416       if ( garray[B->j[j]] > cstart) break;
1417       column_values[cnt++] = B->a[j];
1418     }
1419     for (k=A->i[i]; k<A->i[i+1]; k++) {
1420       column_values[cnt++] = A->a[k];
1421     }
1422     for (; j<B->i[i+1]; j++) {
1423       column_values[cnt++] = B->a[j];
1424     }
1425   }
1426   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1427 
1428   /* store the column values to the file */
1429    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1430   if (!rank) {
1431     MPI_Status status;
1432     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1433     for (i=1; i<size; i++) {
1434        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1435       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1436       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1437       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1438       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1439     }
1440     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1441   } else {
1442     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1443     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1444     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1446   }
1447   ierr = PetscFree(column_values);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1453 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1454 {
1455   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1456   PetscErrorCode    ierr;
1457   PetscMPIInt       rank = aij->rank,size = aij->size;
1458   PetscBool         isdraw,iascii,isbinary;
1459   PetscViewer       sviewer;
1460   PetscViewerFormat format;
1461 
1462   PetscFunctionBegin;
1463   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1464   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1466   if (iascii) {
1467     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1468     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1469       MatInfo    info;
1470       PetscBool  inodes;
1471 
1472       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1473       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1474       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1476       if (!inodes) {
1477         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1478 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1479       } else {
1480         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1481 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1482       }
1483       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1484       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1485       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1487       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1489       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1490       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1493       PetscInt   inodecount,inodelimit,*inodes;
1494       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1495       if (inodes) {
1496         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1497       } else {
1498         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1499       }
1500       PetscFunctionReturn(0);
1501     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1502       PetscFunctionReturn(0);
1503     }
1504   } else if (isbinary) {
1505     if (size == 1) {
1506       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1508     } else {
1509       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1510     }
1511     PetscFunctionReturn(0);
1512   } else if (isdraw) {
1513     PetscDraw  draw;
1514     PetscBool  isnull;
1515     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1516     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1517   }
1518 
1519   if (size == 1) {
1520     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1521     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1522   } else {
1523     /* assemble the entire matrix onto first processor. */
1524     Mat         A;
1525     Mat_SeqAIJ  *Aloc;
1526     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1527     MatScalar   *a;
1528 
1529     if (mat->rmap->N > 1024) {
1530       PetscBool  flg = PETSC_FALSE;
1531 
1532       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1533       if (!flg) {
1534         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1535       }
1536     }
1537 
1538     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1539     if (!rank) {
1540       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1541     } else {
1542       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1543     }
1544     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1545     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1546     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1547     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1548     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1549 
1550     /* copy over the A part */
1551     Aloc = (Mat_SeqAIJ*)aij->A->data;
1552     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1553     row = mat->rmap->rstart;
1554     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1555     for (i=0; i<m; i++) {
1556       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1557       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1558     }
1559     aj = Aloc->j;
1560     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1561 
1562     /* copy over the B part */
1563     Aloc = (Mat_SeqAIJ*)aij->B->data;
1564     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1565     row  = mat->rmap->rstart;
1566     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1567     ct   = cols;
1568     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1569     for (i=0; i<m; i++) {
1570       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1571       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1572     }
1573     ierr = PetscFree(ct);CHKERRQ(ierr);
1574     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1575     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1576     /*
1577        Everyone has to call to draw the matrix since the graphics waits are
1578        synchronized across all processors that share the PetscDraw object
1579     */
1580     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1581     if (!rank) {
1582       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1583       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1584       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1585       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1586     }
1587     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     ierr = MatDestroy(&A);CHKERRQ(ierr);
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatView_MPIAIJ"
1595 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1596 {
1597   PetscErrorCode ierr;
1598   PetscBool      iascii,isdraw,issocket,isbinary;
1599 
1600   PetscFunctionBegin;
1601   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1602   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1603   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1605   if (iascii || isdraw || isbinary || issocket) {
1606     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1607   } else {
1608     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatSOR_MPIAIJ"
1615 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1616 {
1617   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1618   PetscErrorCode ierr;
1619   Vec            bb1 = 0;
1620   PetscBool      hasop;
1621 
1622   PetscFunctionBegin;
1623   if (flag == SOR_APPLY_UPPER) {
1624     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625     PetscFunctionReturn(0);
1626   }
1627 
1628   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1629     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1630   }
1631 
1632   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1633     if (flag & SOR_ZERO_INITIAL_GUESS) {
1634       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1635       its--;
1636     }
1637 
1638     while (its--) {
1639       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641 
1642       /* update rhs: bb1 = bb - B*x */
1643       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1644       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1645 
1646       /* local sweep */
1647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1648     }
1649   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1650     if (flag & SOR_ZERO_INITIAL_GUESS) {
1651       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1652       its--;
1653     }
1654     while (its--) {
1655       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657 
1658       /* update rhs: bb1 = bb - B*x */
1659       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1660       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1661 
1662       /* local sweep */
1663       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1664     }
1665   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1666     if (flag & SOR_ZERO_INITIAL_GUESS) {
1667       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1668       its--;
1669     }
1670     while (its--) {
1671       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1672       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673 
1674       /* update rhs: bb1 = bb - B*x */
1675       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1676       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1677 
1678       /* local sweep */
1679       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1680     }
1681   }  else if (flag & SOR_EISENSTAT) {
1682     Vec         xx1;
1683 
1684     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1685     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1686 
1687     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1688     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1689     if (!mat->diag) {
1690       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1691       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1692     }
1693     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1694     if (hasop) {
1695       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1696     } else {
1697       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1698     }
1699     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1700 
1701     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1702 
1703     /* local sweep */
1704     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1705     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1706     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1707   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1708 
1709   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatPermute_MPIAIJ"
1715 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1716 {
1717   MPI_Comm       comm;
1718   PetscInt       first,local_rowsize,local_colsize;
1719   const PetscInt *rows;
1720   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1725   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1726   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1727   /* collect the global row permutation and invert it */
1728   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1729   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1731   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1732   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1733   /* get the local target indices */
1734   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1735   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1736   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1737   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1738   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1739   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1740   /* the column permutation is so much easier;
1741      make a local version of 'colp' and invert it */
1742   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1743   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1744   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1745   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1746   /* now we just get the submatrix */
1747   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1748   /* clean up */
1749   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1750   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1756 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1757 {
1758   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1759   Mat            A = mat->A,B = mat->B;
1760   PetscErrorCode ierr;
1761   PetscReal      isend[5],irecv[5];
1762 
1763   PetscFunctionBegin;
1764   info->block_size     = 1.0;
1765   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1766   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1767   isend[3] = info->memory;  isend[4] = info->mallocs;
1768   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1769   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1770   isend[3] += info->memory;  isend[4] += info->mallocs;
1771   if (flag == MAT_LOCAL) {
1772     info->nz_used      = isend[0];
1773     info->nz_allocated = isend[1];
1774     info->nz_unneeded  = isend[2];
1775     info->memory       = isend[3];
1776     info->mallocs      = isend[4];
1777   } else if (flag == MAT_GLOBAL_MAX) {
1778     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1779     info->nz_used      = irecv[0];
1780     info->nz_allocated = irecv[1];
1781     info->nz_unneeded  = irecv[2];
1782     info->memory       = irecv[3];
1783     info->mallocs      = irecv[4];
1784   } else if (flag == MAT_GLOBAL_SUM) {
1785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   }
1792   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1793   info->fill_ratio_needed = 0;
1794   info->factor_mallocs    = 0;
1795 
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatSetOption_MPIAIJ"
1801 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1802 {
1803   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   switch (op) {
1808   case MAT_NEW_NONZERO_LOCATIONS:
1809   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1810   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1811   case MAT_KEEP_NONZERO_PATTERN:
1812   case MAT_NEW_NONZERO_LOCATION_ERR:
1813   case MAT_USE_INODES:
1814   case MAT_IGNORE_ZERO_ENTRIES:
1815     MatCheckPreallocated(A,1);
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_ROW_ORIENTED:
1820     a->roworiented = flg;
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_NEW_DIAGONALS:
1825     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1826     break;
1827   case MAT_IGNORE_OFF_PROC_ENTRIES:
1828     a->donotstash = flg;
1829     break;
1830   case MAT_SPD:
1831     A->spd_set                         = PETSC_TRUE;
1832     A->spd                             = flg;
1833     if (flg) {
1834       A->symmetric                     = PETSC_TRUE;
1835       A->structurally_symmetric        = PETSC_TRUE;
1836       A->symmetric_set                 = PETSC_TRUE;
1837       A->structurally_symmetric_set    = PETSC_TRUE;
1838     }
1839     break;
1840   case MAT_SYMMETRIC:
1841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1842     break;
1843   case MAT_STRUCTURALLY_SYMMETRIC:
1844     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1845     break;
1846   case MAT_HERMITIAN:
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SYMMETRY_ETERNAL:
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     break;
1852   default:
1853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatGetRow_MPIAIJ"
1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1861 {
1862   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1863   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1864   PetscErrorCode ierr;
1865   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1866   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1867   PetscInt       *cmap,*idx_p;
1868 
1869   PetscFunctionBegin;
1870   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1871   mat->getrowactive = PETSC_TRUE;
1872 
1873   if (!mat->rowvalues && (idx || v)) {
1874     /*
1875         allocate enough space to hold information from the longest row.
1876     */
1877     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1878     PetscInt   max = 1,tmp;
1879     for (i=0; i<matin->rmap->n; i++) {
1880       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1881       if (max < tmp) { max = tmp; }
1882     }
1883     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1884   }
1885 
1886   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1887   lrow = row - rstart;
1888 
1889   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1890   if (!v)   {pvA = 0; pvB = 0;}
1891   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1892   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1893   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1894   nztot = nzA + nzB;
1895 
1896   cmap  = mat->garray;
1897   if (v  || idx) {
1898     if (nztot) {
1899       /* Sort by increasing column numbers, assuming A and B already sorted */
1900       PetscInt imark = -1;
1901       if (v) {
1902         *v = v_p = mat->rowvalues;
1903         for (i=0; i<nzB; i++) {
1904           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1905           else break;
1906         }
1907         imark = i;
1908         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1909         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1910       }
1911       if (idx) {
1912         *idx = idx_p = mat->rowindices;
1913         if (imark > -1) {
1914           for (i=0; i<imark; i++) {
1915             idx_p[i] = cmap[cworkB[i]];
1916           }
1917         } else {
1918           for (i=0; i<nzB; i++) {
1919             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1920             else break;
1921           }
1922           imark = i;
1923         }
1924         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1925         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1926       }
1927     } else {
1928       if (idx) *idx = 0;
1929       if (v)   *v   = 0;
1930     }
1931   }
1932   *nz = nztot;
1933   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1934   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1940 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1941 {
1942   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1943 
1944   PetscFunctionBegin;
1945   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1946   aij->getrowactive = PETSC_FALSE;
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatNorm_MPIAIJ"
1952 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1953 {
1954   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1955   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1956   PetscErrorCode ierr;
1957   PetscInt       i,j,cstart = mat->cmap->rstart;
1958   PetscReal      sum = 0.0;
1959   MatScalar      *v;
1960 
1961   PetscFunctionBegin;
1962   if (aij->size == 1) {
1963     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1964   } else {
1965     if (type == NORM_FROBENIUS) {
1966       v = amat->a;
1967       for (i=0; i<amat->nz; i++) {
1968 #if defined(PETSC_USE_COMPLEX)
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970 #else
1971         sum += (*v)*(*v); v++;
1972 #endif
1973       }
1974       v = bmat->a;
1975       for (i=0; i<bmat->nz; i++) {
1976 #if defined(PETSC_USE_COMPLEX)
1977         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1978 #else
1979         sum += (*v)*(*v); v++;
1980 #endif
1981       }
1982       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1983       *norm = PetscSqrtReal(*norm);
1984     } else if (type == NORM_1) { /* max column norm */
1985       PetscReal *tmp,*tmp2;
1986       PetscInt  *jj,*garray = aij->garray;
1987       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1988       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1989       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1990       *norm = 0.0;
1991       v = amat->a; jj = amat->j;
1992       for (j=0; j<amat->nz; j++) {
1993         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1994       }
1995       v = bmat->a; jj = bmat->j;
1996       for (j=0; j<bmat->nz; j++) {
1997         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1998       }
1999       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2000       for (j=0; j<mat->cmap->N; j++) {
2001         if (tmp2[j] > *norm) *norm = tmp2[j];
2002       }
2003       ierr = PetscFree(tmp);CHKERRQ(ierr);
2004       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2005     } else if (type == NORM_INFINITY) { /* max row norm */
2006       PetscReal ntemp = 0.0;
2007       for (j=0; j<aij->A->rmap->n; j++) {
2008         v = amat->a + amat->i[j];
2009         sum = 0.0;
2010         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2011           sum += PetscAbsScalar(*v); v++;
2012         }
2013         v = bmat->a + bmat->i[j];
2014         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2015           sum += PetscAbsScalar(*v); v++;
2016         }
2017         if (sum > ntemp) ntemp = sum;
2018       }
2019       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2020     } else {
2021       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2022     }
2023   }
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatTranspose_MPIAIJ"
2029 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2030 {
2031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2032   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2033   PetscErrorCode ierr;
2034   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2035   PetscInt       cstart=A->cmap->rstart,ncol;
2036   Mat            B;
2037   MatScalar      *array;
2038 
2039   PetscFunctionBegin;
2040   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2041 
2042   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2043   ai = Aloc->i; aj = Aloc->j;
2044   bi = Bloc->i; bj = Bloc->j;
2045   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2046     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2047     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2048     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2049     for (i=0; i<ai[ma]; i++){
2050       d_nnz[aj[i]] ++;
2051       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2052     }
2053 
2054     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2055     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2056     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2057     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2058     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2059     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2060     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2061   } else {
2062     B = *matout;
2063     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2064     for (i=0; i<ai[ma]; i++){
2065       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2066     }
2067   }
2068 
2069   /* copy over the A part */
2070   array = Aloc->a;
2071   row = A->rmap->rstart;
2072   for (i=0; i<ma; i++) {
2073     ncol = ai[i+1]-ai[i];
2074     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2075     row++; array += ncol; aj += ncol;
2076   }
2077   aj = Aloc->j;
2078   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2079 
2080   /* copy over the B part */
2081   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2082   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2083   array = Bloc->a;
2084   row = A->rmap->rstart;
2085   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2086   cols_tmp = cols;
2087   for (i=0; i<mb; i++) {
2088     ncol = bi[i+1]-bi[i];
2089     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2090     row++; array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2107 {
2108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2109   Mat            a = aij->A,b = aij->B;
2110   PetscErrorCode ierr;
2111   PetscInt       s1,s2,s3;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2115   if (rr) {
2116     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2117     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2118     /* Overlap communication with computation. */
2119     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2120   }
2121   if (ll) {
2122     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2123     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2124     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2125   }
2126   /* scale  the diagonal block */
2127   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2128 
2129   if (rr) {
2130     /* Do a scatter end and then right scale the off-diagonal block */
2131     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2132     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2133   }
2134 
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatEqual_MPIAIJ"
2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2153 {
2154   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2155   Mat            a,b,c,d;
2156   PetscBool      flg;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   a = matA->A; b = matA->B;
2161   c = matB->A; d = matB->B;
2162 
2163   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2164   if (flg) {
2165     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2166   }
2167   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatCopy_MPIAIJ"
2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2174 {
2175   PetscErrorCode ierr;
2176   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2177   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2181   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2182     /* because of the column compression in the off-processor part of the matrix a->B,
2183        the number of columns in a->B and b->B may be different, hence we cannot call
2184        the MatCopy() directly on the two parts. If need be, we can provide a more
2185        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2186        then copying the submatrices */
2187     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2188   } else {
2189     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2190     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatSetUp_MPIAIJ"
2197 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2198 {
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2210 {
2211   PetscInt          i,m=Y->rmap->N;
2212   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2213   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2214   const PetscInt    *xi = x->i,*yi = y->i;
2215 
2216   PetscFunctionBegin;
2217   /* Set the number of nonzeros in the new matrix */
2218   for(i=0; i<m; i++) {
2219     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2220     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2221     nnz[i] = 0;
2222     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2223       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2224       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2225       nnz[i]++;
2226     }
2227     for (; k<nzy; k++) nnz[i]++;
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatAXPY_MPIAIJ"
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   PetscInt       i;
2238   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2239   PetscBLASInt   bnz,one=1;
2240   Mat_SeqAIJ     *x,*y;
2241 
2242   PetscFunctionBegin;
2243   if (str == SAME_NONZERO_PATTERN) {
2244     PetscScalar alpha = a;
2245     x = (Mat_SeqAIJ *)xx->A->data;
2246     y = (Mat_SeqAIJ *)yy->A->data;
2247     bnz = PetscBLASIntCast(x->nz);
2248     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2249     x = (Mat_SeqAIJ *)xx->B->data;
2250     y = (Mat_SeqAIJ *)yy->B->data;
2251     bnz = PetscBLASIntCast(x->nz);
2252     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2253   } else if (str == SUBSET_NONZERO_PATTERN) {
2254     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2255 
2256     x = (Mat_SeqAIJ *)xx->B->data;
2257     y = (Mat_SeqAIJ *)yy->B->data;
2258     if (y->xtoy && y->XtoY != xx->B) {
2259       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2260       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2261     }
2262     if (!y->xtoy) { /* get xtoy */
2263       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2264       y->XtoY = xx->B;
2265       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2266     }
2267     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2268   } else {
2269     Mat B;
2270     PetscInt *nnz_d,*nnz_o;
2271     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2272     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2273     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2274     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2275     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2276     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2277     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2278     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2279     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2281     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2282     ierr = MatHeaderReplace(Y,B);
2283     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2284     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatConjugate_MPIAIJ"
2293 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2294 {
2295 #if defined(PETSC_USE_COMPLEX)
2296   PetscErrorCode ierr;
2297   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2301   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2302 #else
2303   PetscFunctionBegin;
2304 #endif
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatRealPart_MPIAIJ"
2310 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2311 {
2312   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2317   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2323 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2324 {
2325   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2330   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #ifdef PETSC_HAVE_PBGL
2335 
2336 #include <boost/parallel/mpi/bsp_process_group.hpp>
2337 #include <boost/graph/distributed/ilu_default_graph.hpp>
2338 #include <boost/graph/distributed/ilu_0_block.hpp>
2339 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2340 #include <boost/graph/distributed/petsc/interface.hpp>
2341 #include <boost/multi_array.hpp>
2342 #include <boost/parallel/distributed_property_map->hpp>
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2346 /*
2347   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2348 */
2349 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2350 {
2351   namespace petsc = boost::distributed::petsc;
2352 
2353   namespace graph_dist = boost::graph::distributed;
2354   using boost::graph::distributed::ilu_default::process_group_type;
2355   using boost::graph::ilu_permuted;
2356 
2357   PetscBool       row_identity, col_identity;
2358   PetscContainer  c;
2359   PetscInt        m, n, M, N;
2360   PetscErrorCode  ierr;
2361 
2362   PetscFunctionBegin;
2363   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2364   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2365   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2366   if (!row_identity || !col_identity) {
2367     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2368   }
2369 
2370   process_group_type pg;
2371   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2372   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2373   lgraph_type&   level_graph = *lgraph_p;
2374   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2375 
2376   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2377   ilu_permuted(level_graph);
2378 
2379   /* put together the new matrix */
2380   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2381   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2382   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2383   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2384   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2385   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388 
2389   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2390   ierr = PetscContainerSetPointer(c, lgraph_p);
2391   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2392   ierr = PetscContainerDestroy(&c);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2398 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2399 {
2400   PetscFunctionBegin;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatSolve_MPIAIJ"
2406 /*
2407   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2408 */
2409 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2410 {
2411   namespace graph_dist = boost::graph::distributed;
2412 
2413   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2414   lgraph_type*   lgraph_p;
2415   PetscContainer c;
2416   PetscErrorCode ierr;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2420   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2421   ierr = VecCopy(b, x);CHKERRQ(ierr);
2422 
2423   PetscScalar* array_x;
2424   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2425   PetscInt sx;
2426   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2427 
2428   PetscScalar* array_b;
2429   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2430   PetscInt sb;
2431   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2432 
2433   lgraph_type&   level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2437   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2438                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2439 
2440   typedef boost::iterator_property_map<array_ref_type::iterator,
2441                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2442   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2443                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2444 
2445   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2446 
2447   PetscFunctionReturn(0);
2448 }
2449 #endif
2450 
2451 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2452   PetscInt       nzlocal,nsends,nrecvs;
2453   PetscMPIInt    *send_rank,*recv_rank;
2454   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2455   PetscScalar    *sbuf_a,**rbuf_a;
2456   PetscErrorCode (*Destroy)(Mat);
2457 } Mat_Redundant;
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2461 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2462 {
2463   PetscErrorCode       ierr;
2464   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2465   PetscInt             i;
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2469   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2470   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2471   for (i=0; i<redund->nrecvs; i++){
2472     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2473     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2474   }
2475   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2476   ierr = PetscFree(redund);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatDestroy_MatRedundant"
2482 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2483 {
2484   PetscErrorCode  ierr;
2485   PetscContainer  container;
2486   Mat_Redundant   *redund=PETSC_NULL;
2487 
2488   PetscFunctionBegin;
2489   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2490   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2491   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2492   A->ops->destroy = redund->Destroy;
2493   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2494   if (A->ops->destroy) {
2495     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2496   }
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2502 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2503 {
2504   PetscMPIInt    rank,size;
2505   MPI_Comm       comm=((PetscObject)mat)->comm;
2506   PetscErrorCode ierr;
2507   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2508   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2509   PetscInt       *rowrange=mat->rmap->range;
2510   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2511   Mat            A=aij->A,B=aij->B,C=*matredundant;
2512   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2513   PetscScalar    *sbuf_a;
2514   PetscInt       nzlocal=a->nz+b->nz;
2515   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2516   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2517   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2518   MatScalar      *aworkA,*aworkB;
2519   PetscScalar    *vals;
2520   PetscMPIInt    tag1,tag2,tag3,imdex;
2521   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2522                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2523   MPI_Status     recv_status,*send_status;
2524   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2525   PetscInt       **rbuf_j=PETSC_NULL;
2526   PetscScalar    **rbuf_a=PETSC_NULL;
2527   Mat_Redundant  *redund=PETSC_NULL;
2528   PetscContainer container;
2529 
2530   PetscFunctionBegin;
2531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2533 
2534   if (reuse == MAT_REUSE_MATRIX) {
2535     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2536     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2537     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2538     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2539     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2540     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2541     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2542     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2543 
2544     nsends    = redund->nsends;
2545     nrecvs    = redund->nrecvs;
2546     send_rank = redund->send_rank;
2547     recv_rank = redund->recv_rank;
2548     sbuf_nz   = redund->sbuf_nz;
2549     rbuf_nz   = redund->rbuf_nz;
2550     sbuf_j    = redund->sbuf_j;
2551     sbuf_a    = redund->sbuf_a;
2552     rbuf_j    = redund->rbuf_j;
2553     rbuf_a    = redund->rbuf_a;
2554   }
2555 
2556   if (reuse == MAT_INITIAL_MATRIX){
2557     PetscMPIInt  subrank,subsize;
2558     PetscInt     nleftover,np_subcomm;
2559     /* get the destination processors' id send_rank, nsends and nrecvs */
2560     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2561     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2562     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2563     np_subcomm = size/nsubcomm;
2564     nleftover  = size - nsubcomm*np_subcomm;
2565     nsends = 0; nrecvs = 0;
2566     for (i=0; i<size; i++){ /* i=rank*/
2567       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2568         send_rank[nsends] = i; nsends++;
2569         recv_rank[nrecvs++] = i;
2570       }
2571     }
2572     if (rank >= size - nleftover){/* this proc is a leftover processor */
2573       i = size-nleftover-1;
2574       j = 0;
2575       while (j < nsubcomm - nleftover){
2576         send_rank[nsends++] = i;
2577         i--; j++;
2578       }
2579     }
2580 
2581     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2582       for (i=0; i<nleftover; i++){
2583         recv_rank[nrecvs++] = size-nleftover+i;
2584       }
2585     }
2586 
2587     /* allocate sbuf_j, sbuf_a */
2588     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2589     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2590     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2591   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2592 
2593   /* copy mat's local entries into the buffers */
2594   if (reuse == MAT_INITIAL_MATRIX){
2595     rownz_max = 0;
2596     rptr = sbuf_j;
2597     cols = sbuf_j + rend-rstart + 1;
2598     vals = sbuf_a;
2599     rptr[0] = 0;
2600     for (i=0; i<rend-rstart; i++){
2601       row = i + rstart;
2602       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2603       ncols  = nzA + nzB;
2604       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2605       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2606       /* load the column indices for this row into cols */
2607       lwrite = 0;
2608       for (l=0; l<nzB; l++) {
2609         if ((ctmp = bmap[cworkB[l]]) < cstart){
2610           vals[lwrite]   = aworkB[l];
2611           cols[lwrite++] = ctmp;
2612         }
2613       }
2614       for (l=0; l<nzA; l++){
2615         vals[lwrite]   = aworkA[l];
2616         cols[lwrite++] = cstart + cworkA[l];
2617       }
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend){
2620           vals[lwrite]   = aworkB[l];
2621           cols[lwrite++] = ctmp;
2622         }
2623       }
2624       vals += ncols;
2625       cols += ncols;
2626       rptr[i+1] = rptr[i] + ncols;
2627       if (rownz_max < ncols) rownz_max = ncols;
2628     }
2629     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2630   } else { /* only copy matrix values into sbuf_a */
2631     rptr = sbuf_j;
2632     vals = sbuf_a;
2633     rptr[0] = 0;
2634     for (i=0; i<rend-rstart; i++){
2635       row = i + rstart;
2636       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2637       ncols  = nzA + nzB;
2638       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2639       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2640       lwrite = 0;
2641       for (l=0; l<nzB; l++) {
2642         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2643       }
2644       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2645       for (l=0; l<nzB; l++) {
2646         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2647       }
2648       vals += ncols;
2649       rptr[i+1] = rptr[i] + ncols;
2650     }
2651   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2652 
2653   /* send nzlocal to others, and recv other's nzlocal */
2654   /*--------------------------------------------------*/
2655   if (reuse == MAT_INITIAL_MATRIX){
2656     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2657     s_waits2 = s_waits3 + nsends;
2658     s_waits1 = s_waits2 + nsends;
2659     r_waits1 = s_waits1 + nsends;
2660     r_waits2 = r_waits1 + nrecvs;
2661     r_waits3 = r_waits2 + nrecvs;
2662   } else {
2663     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2664     r_waits3 = s_waits3 + nsends;
2665   }
2666 
2667   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2668   if (reuse == MAT_INITIAL_MATRIX){
2669     /* get new tags to keep the communication clean */
2670     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2671     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2672     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2673 
2674     /* post receives of other's nzlocal */
2675     for (i=0; i<nrecvs; i++){
2676       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2677     }
2678     /* send nzlocal to others */
2679     for (i=0; i<nsends; i++){
2680       sbuf_nz[i] = nzlocal;
2681       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2682     }
2683     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2684     count = nrecvs;
2685     while (count) {
2686       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2687       recv_rank[imdex] = recv_status.MPI_SOURCE;
2688       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2689       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2690 
2691       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2692       rbuf_nz[imdex] += i + 2;
2693       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2694       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2695       count--;
2696     }
2697     /* wait on sends of nzlocal */
2698     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2699     /* send mat->i,j to others, and recv from other's */
2700     /*------------------------------------------------*/
2701     for (i=0; i<nsends; i++){
2702       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2703       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2704     }
2705     /* wait on receives of mat->i,j */
2706     /*------------------------------*/
2707     count = nrecvs;
2708     while (count) {
2709       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2710       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2711       count--;
2712     }
2713     /* wait on sends of mat->i,j */
2714     /*---------------------------*/
2715     if (nsends) {
2716       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2717     }
2718   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2719 
2720   /* post receives, send and receive mat->a */
2721   /*----------------------------------------*/
2722   for (imdex=0; imdex<nrecvs; imdex++) {
2723     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2724   }
2725   for (i=0; i<nsends; i++){
2726     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2727   }
2728   count = nrecvs;
2729   while (count) {
2730     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2731     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2732     count--;
2733   }
2734   if (nsends) {
2735     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2736   }
2737 
2738   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2739 
2740   /* create redundant matrix */
2741   /*-------------------------*/
2742   if (reuse == MAT_INITIAL_MATRIX){
2743     /* compute rownz_max for preallocation */
2744     for (imdex=0; imdex<nrecvs; imdex++){
2745       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2746       rptr = rbuf_j[imdex];
2747       for (i=0; i<j; i++){
2748         ncols = rptr[i+1] - rptr[i];
2749         if (rownz_max < ncols) rownz_max = ncols;
2750       }
2751     }
2752 
2753     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2754     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2755     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2756     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2757     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2758     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2759   } else {
2760     C = *matredundant;
2761   }
2762 
2763   /* insert local matrix entries */
2764   rptr = sbuf_j;
2765   cols = sbuf_j + rend-rstart + 1;
2766   vals = sbuf_a;
2767   for (i=0; i<rend-rstart; i++){
2768     row   = i + rstart;
2769     ncols = rptr[i+1] - rptr[i];
2770     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2771     vals += ncols;
2772     cols += ncols;
2773   }
2774   /* insert received matrix entries */
2775   for (imdex=0; imdex<nrecvs; imdex++){
2776     rstart = rowrange[recv_rank[imdex]];
2777     rend   = rowrange[recv_rank[imdex]+1];
2778     rptr = rbuf_j[imdex];
2779     cols = rbuf_j[imdex] + rend-rstart + 1;
2780     vals = rbuf_a[imdex];
2781     for (i=0; i<rend-rstart; i++){
2782       row   = i + rstart;
2783       ncols = rptr[i+1] - rptr[i];
2784       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2785       vals += ncols;
2786       cols += ncols;
2787     }
2788   }
2789   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2792   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2793   if (reuse == MAT_INITIAL_MATRIX) {
2794     PetscContainer container;
2795     *matredundant = C;
2796     /* create a supporting struct and attach it to C for reuse */
2797     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2798     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2799     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2800     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2801     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2802     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2803 
2804     redund->nzlocal = nzlocal;
2805     redund->nsends  = nsends;
2806     redund->nrecvs  = nrecvs;
2807     redund->send_rank = send_rank;
2808     redund->recv_rank = recv_rank;
2809     redund->sbuf_nz = sbuf_nz;
2810     redund->rbuf_nz = rbuf_nz;
2811     redund->sbuf_j  = sbuf_j;
2812     redund->sbuf_a  = sbuf_a;
2813     redund->rbuf_j  = rbuf_j;
2814     redund->rbuf_a  = rbuf_a;
2815 
2816     redund->Destroy = C->ops->destroy;
2817     C->ops->destroy = MatDestroy_MatRedundant;
2818   }
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 #undef __FUNCT__
2823 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2824 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2825 {
2826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2827   PetscErrorCode ierr;
2828   PetscInt       i,*idxb = 0;
2829   PetscScalar    *va,*vb;
2830   Vec            vtmp;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2834   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2835   if (idx) {
2836     for (i=0; i<A->rmap->n; i++) {
2837       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2838     }
2839   }
2840 
2841   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2842   if (idx) {
2843     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2844   }
2845   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2846   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2847 
2848   for (i=0; i<A->rmap->n; i++){
2849     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2850       va[i] = vb[i];
2851       if (idx) idx[i] = a->garray[idxb[i]];
2852     }
2853   }
2854 
2855   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2856   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2857   ierr = PetscFree(idxb);CHKERRQ(ierr);
2858   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #undef __FUNCT__
2863 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2864 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2865 {
2866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2867   PetscErrorCode ierr;
2868   PetscInt       i,*idxb = 0;
2869   PetscScalar    *va,*vb;
2870   Vec            vtmp;
2871 
2872   PetscFunctionBegin;
2873   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2874   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2875   if (idx) {
2876     for (i=0; i<A->cmap->n; i++) {
2877       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2878     }
2879   }
2880 
2881   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2882   if (idx) {
2883     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2884   }
2885   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2886   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2887 
2888   for (i=0; i<A->rmap->n; i++){
2889     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2890       va[i] = vb[i];
2891       if (idx) idx[i] = a->garray[idxb[i]];
2892     }
2893   }
2894 
2895   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2896   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2897   ierr = PetscFree(idxb);CHKERRQ(ierr);
2898   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2904 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2905 {
2906   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2907   PetscInt       n      = A->rmap->n;
2908   PetscInt       cstart = A->cmap->rstart;
2909   PetscInt      *cmap   = mat->garray;
2910   PetscInt      *diagIdx, *offdiagIdx;
2911   Vec            diagV, offdiagV;
2912   PetscScalar   *a, *diagA, *offdiagA;
2913   PetscInt       r;
2914   PetscErrorCode ierr;
2915 
2916   PetscFunctionBegin;
2917   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2918   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2919   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2920   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2921   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2922   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2923   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2924   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2925   for(r = 0; r < n; ++r) {
2926     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2927       a[r]   = diagA[r];
2928       idx[r] = cstart + diagIdx[r];
2929     } else {
2930       a[r]   = offdiagA[r];
2931       idx[r] = cmap[offdiagIdx[r]];
2932     }
2933   }
2934   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2935   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2936   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2937   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2938   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2939   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2945 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2946 {
2947   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2948   PetscInt       n      = A->rmap->n;
2949   PetscInt       cstart = A->cmap->rstart;
2950   PetscInt      *cmap   = mat->garray;
2951   PetscInt      *diagIdx, *offdiagIdx;
2952   Vec            diagV, offdiagV;
2953   PetscScalar   *a, *diagA, *offdiagA;
2954   PetscInt       r;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2959   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2960   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2961   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2962   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2963   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2964   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2965   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2966   for(r = 0; r < n; ++r) {
2967     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2968       a[r]   = diagA[r];
2969       idx[r] = cstart + diagIdx[r];
2970     } else {
2971       a[r]   = offdiagA[r];
2972       idx[r] = cmap[offdiagIdx[r]];
2973     }
2974   }
2975   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2976   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2978   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2979   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2980   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2986 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2987 {
2988   PetscErrorCode ierr;
2989   Mat            *dummy;
2990 
2991   PetscFunctionBegin;
2992   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2993   *newmat = *dummy;
2994   ierr = PetscFree(dummy);CHKERRQ(ierr);
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2999 
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3002 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3003 {
3004   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3005   PetscErrorCode ierr;
3006 
3007   PetscFunctionBegin;
3008   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 
3013 /* -------------------------------------------------------------------*/
3014 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3015        MatGetRow_MPIAIJ,
3016        MatRestoreRow_MPIAIJ,
3017        MatMult_MPIAIJ,
3018 /* 4*/ MatMultAdd_MPIAIJ,
3019        MatMultTranspose_MPIAIJ,
3020        MatMultTransposeAdd_MPIAIJ,
3021 #ifdef PETSC_HAVE_PBGL
3022        MatSolve_MPIAIJ,
3023 #else
3024        0,
3025 #endif
3026        0,
3027        0,
3028 /*10*/ 0,
3029        0,
3030        0,
3031        MatSOR_MPIAIJ,
3032        MatTranspose_MPIAIJ,
3033 /*15*/ MatGetInfo_MPIAIJ,
3034        MatEqual_MPIAIJ,
3035        MatGetDiagonal_MPIAIJ,
3036        MatDiagonalScale_MPIAIJ,
3037        MatNorm_MPIAIJ,
3038 /*20*/ MatAssemblyBegin_MPIAIJ,
3039        MatAssemblyEnd_MPIAIJ,
3040        MatSetOption_MPIAIJ,
3041        MatZeroEntries_MPIAIJ,
3042 /*24*/ MatZeroRows_MPIAIJ,
3043        0,
3044 #ifdef PETSC_HAVE_PBGL
3045        0,
3046 #else
3047        0,
3048 #endif
3049        0,
3050        0,
3051 /*29*/ MatSetUp_MPIAIJ,
3052 #ifdef PETSC_HAVE_PBGL
3053        0,
3054 #else
3055        0,
3056 #endif
3057        0,
3058        0,
3059        0,
3060 /*34*/ MatDuplicate_MPIAIJ,
3061        0,
3062        0,
3063        0,
3064        0,
3065 /*39*/ MatAXPY_MPIAIJ,
3066        MatGetSubMatrices_MPIAIJ,
3067        MatIncreaseOverlap_MPIAIJ,
3068        MatGetValues_MPIAIJ,
3069        MatCopy_MPIAIJ,
3070 /*44*/ MatGetRowMax_MPIAIJ,
3071        MatScale_MPIAIJ,
3072        0,
3073        0,
3074        MatZeroRowsColumns_MPIAIJ,
3075 /*49*/ 0,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*54*/ MatFDColoringCreate_MPIAIJ,
3081        0,
3082        MatSetUnfactored_MPIAIJ,
3083        0, /* MatPermute_MPIAIJ, impl currently broken */
3084        0,
3085 /*59*/ MatGetSubMatrix_MPIAIJ,
3086        MatDestroy_MPIAIJ,
3087        MatView_MPIAIJ,
3088        0,
3089        0,
3090 /*64*/ 0,
3091        0,
3092        0,
3093        0,
3094        0,
3095 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3096        MatGetRowMinAbs_MPIAIJ,
3097        0,
3098        MatSetColoring_MPIAIJ,
3099 #if defined(PETSC_HAVE_ADIC)
3100        MatSetValuesAdic_MPIAIJ,
3101 #else
3102        0,
3103 #endif
3104        MatSetValuesAdifor_MPIAIJ,
3105 /*75*/ MatFDColoringApply_AIJ,
3106        0,
3107        0,
3108        0,
3109        MatFindZeroDiagonals_MPIAIJ,
3110 /*80*/ 0,
3111        0,
3112        0,
3113 /*83*/ MatLoad_MPIAIJ,
3114        0,
3115        0,
3116        0,
3117        0,
3118        0,
3119 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3120        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3121        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3122        MatPtAP_Basic,
3123        MatPtAPSymbolic_MPIAIJ,
3124 /*94*/ MatPtAPNumeric_MPIAIJ,
3125        0,
3126        0,
3127        0,
3128        0,
3129 /*99*/ 0,
3130        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3131        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3132        MatConjugate_MPIAIJ,
3133        0,
3134 /*104*/MatSetValuesRow_MPIAIJ,
3135        MatRealPart_MPIAIJ,
3136        MatImaginaryPart_MPIAIJ,
3137        0,
3138        0,
3139 /*109*/0,
3140        MatGetRedundantMatrix_MPIAIJ,
3141        MatGetRowMin_MPIAIJ,
3142        0,
3143        0,
3144 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3145        0,
3146        0,
3147        0,
3148        0,
3149 /*119*/0,
3150        0,
3151        0,
3152        0,
3153        MatGetMultiProcBlock_MPIAIJ,
3154 /*124*/MatFindNonzeroRows_MPIAIJ,
3155        MatGetColumnNorms_MPIAIJ,
3156        MatInvertBlockDiagonal_MPIAIJ,
3157        0,
3158        MatGetSubMatricesParallel_MPIAIJ,
3159 /*129*/0,
3160        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3161        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3162        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3163        0,
3164 /*134*/0,
3165        0,
3166        0,
3167        0,
3168        0
3169 };
3170 
3171 /* ----------------------------------------------------------------------------------------*/
3172 
3173 EXTERN_C_BEGIN
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3176 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3177 {
3178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3183   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 EXTERN_C_END
3187 
3188 EXTERN_C_BEGIN
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3191 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3192 {
3193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3198   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 EXTERN_C_END
3202 
3203 EXTERN_C_BEGIN
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3206 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3207 {
3208   Mat_MPIAIJ     *b;
3209   PetscErrorCode ierr;
3210   PetscInt       i;
3211   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3212 
3213   PetscFunctionBegin;
3214   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3215   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3216   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3217   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3218   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3219   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3220 
3221   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3222   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3223   if (d_nnz) {
3224     for (i=0; i<B->rmap->n; i++) {
3225       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3226     }
3227   }
3228   if (o_nnz) {
3229     for (i=0; i<B->rmap->n; i++) {
3230       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3231     }
3232   }
3233   b = (Mat_MPIAIJ*)B->data;
3234 
3235   if (!B->preallocated) {
3236     /* Explicitly create 2 MATSEQAIJ matrices. */
3237     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3238     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3239     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3240     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3241     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3242     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3243     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3244     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3245     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3246     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3247   }
3248 
3249   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3250   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3251   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3252   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3253   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3254   B->preallocated = PETSC_TRUE;
3255   PetscFunctionReturn(0);
3256 }
3257 EXTERN_C_END
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3261 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3262 {
3263   Mat            mat;
3264   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3265   PetscErrorCode ierr;
3266 
3267   PetscFunctionBegin;
3268   *newmat       = 0;
3269   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3270   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3271   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3272   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3273   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3274   a    = (Mat_MPIAIJ*)mat->data;
3275 
3276   mat->factortype    = matin->factortype;
3277   mat->rmap->bs      = matin->rmap->bs;
3278   mat->cmap->bs      = matin->cmap->bs;
3279   mat->assembled    = PETSC_TRUE;
3280   mat->insertmode   = NOT_SET_VALUES;
3281   mat->preallocated = PETSC_TRUE;
3282 
3283   a->size           = oldmat->size;
3284   a->rank           = oldmat->rank;
3285   a->donotstash     = oldmat->donotstash;
3286   a->roworiented    = oldmat->roworiented;
3287   a->rowindices     = 0;
3288   a->rowvalues      = 0;
3289   a->getrowactive   = PETSC_FALSE;
3290 
3291   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3292   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3293 
3294   if (oldmat->colmap) {
3295 #if defined (PETSC_USE_CTABLE)
3296     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3297 #else
3298     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301 #endif
3302   } else a->colmap = 0;
3303   if (oldmat->garray) {
3304     PetscInt len;
3305     len  = oldmat->B->cmap->n;
3306     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3307     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3308     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3309   } else a->garray = 0;
3310 
3311   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3312   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3313   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3314   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3315   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3316   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3317   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3318   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3319   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3320   *newmat = mat;
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 
3325 
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatLoad_MPIAIJ"
3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329 {
3330   PetscScalar    *vals,*svals;
3331   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3332   PetscErrorCode ierr;
3333   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3334   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3336   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3337   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3338   int            fd;
3339 
3340   PetscFunctionBegin;
3341   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3342   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3343   if (!rank) {
3344     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3345     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3346     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3347   }
3348 
3349   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3350 
3351   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3352   M = header[1]; N = header[2];
3353   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3354   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3355   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3356 
3357   /* If global sizes are set, check if they are consistent with that given in the file */
3358   if (sizesset) {
3359     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3360   }
3361   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3362   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3363 
3364   /* determine ownership of all rows */
3365   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3366   else m = newMat->rmap->n; /* Set by user */
3367 
3368   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3369   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3370 
3371   /* First process needs enough room for process with most rows */
3372   if (!rank) {
3373     mmax = rowners[1];
3374     for(i=2; i<=size; i++) {
3375       mmax = PetscMax(mmax, rowners[i]);
3376     }
3377   } else mmax = m;
3378 
3379   rowners[0] = 0;
3380   for (i=2; i<=size; i++) {
3381     rowners[i] += rowners[i-1];
3382   }
3383   rstart = rowners[rank];
3384   rend   = rowners[rank+1];
3385 
3386   /* distribute row lengths to all processors */
3387   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3388   if (!rank) {
3389     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3390     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3391     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3392     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3393     for (j=0; j<m; j++) {
3394       procsnz[0] += ourlens[j];
3395     }
3396     for (i=1; i<size; i++) {
3397       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3398       /* calculate the number of nonzeros on each processor */
3399       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3400         procsnz[i] += rowlengths[j];
3401       }
3402       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3403     }
3404     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3405   } else {
3406     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3407   }
3408 
3409   if (!rank) {
3410     /* determine max buffer needed and allocate it */
3411     maxnz = 0;
3412     for (i=0; i<size; i++) {
3413       maxnz = PetscMax(maxnz,procsnz[i]);
3414     }
3415     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3416 
3417     /* read in my part of the matrix column indices  */
3418     nz   = procsnz[0];
3419     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3420     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3421 
3422     /* read in every one elses and ship off */
3423     for (i=1; i<size; i++) {
3424       nz     = procsnz[i];
3425       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3426       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3427     }
3428     ierr = PetscFree(cols);CHKERRQ(ierr);
3429   } else {
3430     /* determine buffer space needed for message */
3431     nz = 0;
3432     for (i=0; i<m; i++) {
3433       nz += ourlens[i];
3434     }
3435     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3436 
3437     /* receive message of column indices*/
3438     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3439   }
3440 
3441   /* determine column ownership if matrix is not square */
3442   if (N != M) {
3443     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3444     else n = newMat->cmap->n;
3445     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3446     cstart = cend - n;
3447   } else {
3448     cstart = rstart;
3449     cend   = rend;
3450     n      = cend - cstart;
3451   }
3452 
3453   /* loop over local rows, determining number of off diagonal entries */
3454   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3455   jj = 0;
3456   for (i=0; i<m; i++) {
3457     for (j=0; j<ourlens[i]; j++) {
3458       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3459       jj++;
3460     }
3461   }
3462 
3463   for (i=0; i<m; i++) {
3464     ourlens[i] -= offlens[i];
3465   }
3466   if (!sizesset) {
3467     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3468   }
3469   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3470 
3471   for (i=0; i<m; i++) {
3472     ourlens[i] += offlens[i];
3473   }
3474 
3475   if (!rank) {
3476     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3477 
3478     /* read in my part of the matrix numerical values  */
3479     nz   = procsnz[0];
3480     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3481 
3482     /* insert into matrix */
3483     jj      = rstart;
3484     smycols = mycols;
3485     svals   = vals;
3486     for (i=0; i<m; i++) {
3487       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3488       smycols += ourlens[i];
3489       svals   += ourlens[i];
3490       jj++;
3491     }
3492 
3493     /* read in other processors and ship out */
3494     for (i=1; i<size; i++) {
3495       nz     = procsnz[i];
3496       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3497       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3498     }
3499     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3500   } else {
3501     /* receive numeric values */
3502     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3503 
3504     /* receive message of values*/
3505     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3506 
3507     /* insert into matrix */
3508     jj      = rstart;
3509     smycols = mycols;
3510     svals   = vals;
3511     for (i=0; i<m; i++) {
3512       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3513       smycols += ourlens[i];
3514       svals   += ourlens[i];
3515       jj++;
3516     }
3517   }
3518   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3519   ierr = PetscFree(vals);CHKERRQ(ierr);
3520   ierr = PetscFree(mycols);CHKERRQ(ierr);
3521   ierr = PetscFree(rowners);CHKERRQ(ierr);
3522 
3523   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3524   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3530 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3531 {
3532   PetscErrorCode ierr;
3533   IS             iscol_local;
3534   PetscInt       csize;
3535 
3536   PetscFunctionBegin;
3537   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3538   if (call == MAT_REUSE_MATRIX) {
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3540     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541   } else {
3542     PetscInt cbs;
3543     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3544     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3545     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3546   }
3547   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3548   if (call == MAT_INITIAL_MATRIX) {
3549     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3550     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3551   }
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3556 #undef __FUNCT__
3557 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3558 /*
3559     Not great since it makes two copies of the submatrix, first an SeqAIJ
3560   in local and then by concatenating the local matrices the end result.
3561   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3562 
3563   Note: This requires a sequential iscol with all indices.
3564 */
3565 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3566 {
3567   PetscErrorCode ierr;
3568   PetscMPIInt    rank,size;
3569   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3570   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3571   PetscBool      allcolumns, colflag;
3572   Mat            M,Mreuse;
3573   MatScalar      *vwork,*aa;
3574   MPI_Comm       comm = ((PetscObject)mat)->comm;
3575   Mat_SeqAIJ     *aij;
3576 
3577 
3578   PetscFunctionBegin;
3579   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3581 
3582   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3583   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3584   if (colflag && ncol == mat->cmap->N){
3585     allcolumns = PETSC_TRUE;
3586   } else {
3587     allcolumns = PETSC_FALSE;
3588   }
3589   if (call ==  MAT_REUSE_MATRIX) {
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3591     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3592     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3593   } else {
3594     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3595   }
3596 
3597   /*
3598       m - number of local rows
3599       n - number of columns (same on all processors)
3600       rstart - first row in new global matrix generated
3601   */
3602   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3603   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3604   if (call == MAT_INITIAL_MATRIX) {
3605     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606     ii  = aij->i;
3607     jj  = aij->j;
3608 
3609     /*
3610         Determine the number of non-zeros in the diagonal and off-diagonal
3611         portions of the matrix in order to do correct preallocation
3612     */
3613 
3614     /* first get start and end of "diagonal" columns */
3615     if (csize == PETSC_DECIDE) {
3616       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3617       if (mglobal == n) { /* square matrix */
3618 	nlocal = m;
3619       } else {
3620         nlocal = n/size + ((n % size) > rank);
3621       }
3622     } else {
3623       nlocal = csize;
3624     }
3625     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3626     rstart = rend - nlocal;
3627     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3628 
3629     /* next, compute all the lengths */
3630     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3631     olens = dlens + m;
3632     for (i=0; i<m; i++) {
3633       jend = ii[i+1] - ii[i];
3634       olen = 0;
3635       dlen = 0;
3636       for (j=0; j<jend; j++) {
3637         if (*jj < rstart || *jj >= rend) olen++;
3638         else dlen++;
3639         jj++;
3640       }
3641       olens[i] = olen;
3642       dlens[i] = dlen;
3643     }
3644     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3645     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3646     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3647     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3648     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3649     ierr = PetscFree(dlens);CHKERRQ(ierr);
3650   } else {
3651     PetscInt ml,nl;
3652 
3653     M = *newmat;
3654     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3655     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3656     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3657     /*
3658          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3659        rather than the slower MatSetValues().
3660     */
3661     M->was_assembled = PETSC_TRUE;
3662     M->assembled     = PETSC_FALSE;
3663   }
3664   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3665   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3666   ii  = aij->i;
3667   jj  = aij->j;
3668   aa  = aij->a;
3669   for (i=0; i<m; i++) {
3670     row   = rstart + i;
3671     nz    = ii[i+1] - ii[i];
3672     cwork = jj;     jj += nz;
3673     vwork = aa;     aa += nz;
3674     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3675   }
3676 
3677   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3679   *newmat = M;
3680 
3681   /* save submatrix used in processor for next request */
3682   if (call ==  MAT_INITIAL_MATRIX) {
3683     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3684     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3685   }
3686 
3687   PetscFunctionReturn(0);
3688 }
3689 
3690 EXTERN_C_BEGIN
3691 #undef __FUNCT__
3692 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3693 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3694 {
3695   PetscInt       m,cstart, cend,j,nnz,i,d;
3696   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3697   const PetscInt *JJ;
3698   PetscScalar    *values;
3699   PetscErrorCode ierr;
3700 
3701   PetscFunctionBegin;
3702   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3703 
3704   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3705   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3706   m      = B->rmap->n;
3707   cstart = B->cmap->rstart;
3708   cend   = B->cmap->rend;
3709   rstart = B->rmap->rstart;
3710 
3711   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3712 
3713 #if defined(PETSC_USE_DEBUGGING)
3714   for (i=0; i<m; i++) {
3715     nnz     = Ii[i+1]- Ii[i];
3716     JJ      = J + Ii[i];
3717     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3718     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3719     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3720   }
3721 #endif
3722 
3723   for (i=0; i<m; i++) {
3724     nnz     = Ii[i+1]- Ii[i];
3725     JJ      = J + Ii[i];
3726     nnz_max = PetscMax(nnz_max,nnz);
3727     d       = 0;
3728     for (j=0; j<nnz; j++) {
3729       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3730     }
3731     d_nnz[i] = d;
3732     o_nnz[i] = nnz - d;
3733   }
3734   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3735   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3736 
3737   if (v) values = (PetscScalar*)v;
3738   else {
3739     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3740     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3741   }
3742 
3743   for (i=0; i<m; i++) {
3744     ii   = i + rstart;
3745     nnz  = Ii[i+1]- Ii[i];
3746     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3747   }
3748   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3750 
3751   if (!v) {
3752     ierr = PetscFree(values);CHKERRQ(ierr);
3753   }
3754   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3755   PetscFunctionReturn(0);
3756 }
3757 EXTERN_C_END
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3761 /*@
3762    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3763    (the default parallel PETSc format).
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  B - the matrix
3769 .  i - the indices into j for the start of each local row (starts with zero)
3770 .  j - the column indices for each local row (starts with zero)
3771 -  v - optional values in the matrix
3772 
3773    Level: developer
3774 
3775    Notes:
3776        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3777      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3778      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3779 
3780        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3781 
3782        The format which is used for the sparse matrix input, is equivalent to a
3783     row-major ordering.. i.e for the following matrix, the input data expected is
3784     as shown:
3785 
3786         1 0 0
3787         2 0 3     P0
3788        -------
3789         4 5 6     P1
3790 
3791      Process0 [P0]: rows_owned=[0,1]
3792         i =  {0,1,3}  [size = nrow+1  = 2+1]
3793         j =  {0,0,2}  [size = nz = 6]
3794         v =  {1,2,3}  [size = nz = 6]
3795 
3796      Process1 [P1]: rows_owned=[2]
3797         i =  {0,3}    [size = nrow+1  = 1+1]
3798         j =  {0,1,2}  [size = nz = 6]
3799         v =  {4,5,6}  [size = nz = 6]
3800 
3801 .keywords: matrix, aij, compressed row, sparse, parallel
3802 
3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3804           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3805 @*/
3806 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3807 {
3808   PetscErrorCode ierr;
3809 
3810   PetscFunctionBegin;
3811   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 #undef __FUNCT__
3816 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3817 /*@C
3818    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3819    (the default parallel PETSc format).  For good matrix assembly performance
3820    the user should preallocate the matrix storage by setting the parameters
3821    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3822    performance can be increased by more than a factor of 50.
3823 
3824    Collective on MPI_Comm
3825 
3826    Input Parameters:
3827 +  A - the matrix
3828 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3829            (same value is used for all local rows)
3830 .  d_nnz - array containing the number of nonzeros in the various rows of the
3831            DIAGONAL portion of the local submatrix (possibly different for each row)
3832            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3833            The size of this array is equal to the number of local rows, i.e 'm'.
3834            For matrices that will be factored, you must leave room for (and set)
3835            the diagonal entry even if it is zero.
3836 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3837            submatrix (same value is used for all local rows).
3838 -  o_nnz - array containing the number of nonzeros in the various rows of the
3839            OFF-DIAGONAL portion of the local submatrix (possibly different for
3840            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3841            structure. The size of this array is equal to the number
3842            of local rows, i.e 'm'.
3843 
3844    If the *_nnz parameter is given then the *_nz parameter is ignored
3845 
3846    The AIJ format (also called the Yale sparse matrix format or
3847    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3848    storage.  The stored row and column indices begin with zero.
3849    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3850 
3851    The parallel matrix is partitioned such that the first m0 rows belong to
3852    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3853    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3854 
3855    The DIAGONAL portion of the local submatrix of a processor can be defined
3856    as the submatrix which is obtained by extraction the part corresponding to
3857    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3858    first row that belongs to the processor, r2 is the last row belonging to
3859    the this processor, and c1-c2 is range of indices of the local part of a
3860    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3861    common case of a square matrix, the row and column ranges are the same and
3862    the DIAGONAL part is also square. The remaining portion of the local
3863    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3864 
3865    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3866 
3867    You can call MatGetInfo() to get information on how effective the preallocation was;
3868    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3869    You can also run with the option -info and look for messages with the string
3870    malloc in them to see if additional memory allocation was needed.
3871 
3872    Example usage:
3873 
3874    Consider the following 8x8 matrix with 34 non-zero values, that is
3875    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3876    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3877    as follows:
3878 
3879 .vb
3880             1  2  0  |  0  3  0  |  0  4
3881     Proc0   0  5  6  |  7  0  0  |  8  0
3882             9  0 10  | 11  0  0  | 12  0
3883     -------------------------------------
3884            13  0 14  | 15 16 17  |  0  0
3885     Proc1   0 18  0  | 19 20 21  |  0  0
3886             0  0  0  | 22 23  0  | 24  0
3887     -------------------------------------
3888     Proc2  25 26 27  |  0  0 28  | 29  0
3889            30  0  0  | 31 32 33  |  0 34
3890 .ve
3891 
3892    This can be represented as a collection of submatrices as:
3893 
3894 .vb
3895       A B C
3896       D E F
3897       G H I
3898 .ve
3899 
3900    Where the submatrices A,B,C are owned by proc0, D,E,F are
3901    owned by proc1, G,H,I are owned by proc2.
3902 
3903    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3904    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3905    The 'M','N' parameters are 8,8, and have the same values on all procs.
3906 
3907    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3908    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3909    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3910    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3911    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3912    matrix, ans [DF] as another SeqAIJ matrix.
3913 
3914    When d_nz, o_nz parameters are specified, d_nz storage elements are
3915    allocated for every row of the local diagonal submatrix, and o_nz
3916    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3917    One way to choose d_nz and o_nz is to use the max nonzerors per local
3918    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3919    In this case, the values of d_nz,o_nz are:
3920 .vb
3921      proc0 : dnz = 2, o_nz = 2
3922      proc1 : dnz = 3, o_nz = 2
3923      proc2 : dnz = 1, o_nz = 4
3924 .ve
3925    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3926    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3927    for proc3. i.e we are using 12+15+10=37 storage locations to store
3928    34 values.
3929 
3930    When d_nnz, o_nnz parameters are specified, the storage is specified
3931    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3932    In the above case the values for d_nnz,o_nnz are:
3933 .vb
3934      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3935      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3936      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3937 .ve
3938    Here the space allocated is sum of all the above values i.e 34, and
3939    hence pre-allocation is perfect.
3940 
3941    Level: intermediate
3942 
3943 .keywords: matrix, aij, compressed row, sparse, parallel
3944 
3945 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3946           MPIAIJ, MatGetInfo()
3947 @*/
3948 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3949 {
3950   PetscErrorCode ierr;
3951 
3952   PetscFunctionBegin;
3953   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3954   PetscValidType(B,1);
3955   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3956   PetscFunctionReturn(0);
3957 }
3958 
3959 #undef __FUNCT__
3960 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3961 /*@
3962      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3963          CSR format the local rows.
3964 
3965    Collective on MPI_Comm
3966 
3967    Input Parameters:
3968 +  comm - MPI communicator
3969 .  m - number of local rows (Cannot be PETSC_DECIDE)
3970 .  n - This value should be the same as the local size used in creating the
3971        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3972        calculated if N is given) For square matrices n is almost always m.
3973 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3974 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3975 .   i - row indices
3976 .   j - column indices
3977 -   a - matrix values
3978 
3979    Output Parameter:
3980 .   mat - the matrix
3981 
3982    Level: intermediate
3983 
3984    Notes:
3985        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3986      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3987      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3988 
3989        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3990 
3991        The format which is used for the sparse matrix input, is equivalent to a
3992     row-major ordering.. i.e for the following matrix, the input data expected is
3993     as shown:
3994 
3995         1 0 0
3996         2 0 3     P0
3997        -------
3998         4 5 6     P1
3999 
4000      Process0 [P0]: rows_owned=[0,1]
4001         i =  {0,1,3}  [size = nrow+1  = 2+1]
4002         j =  {0,0,2}  [size = nz = 6]
4003         v =  {1,2,3}  [size = nz = 6]
4004 
4005      Process1 [P1]: rows_owned=[2]
4006         i =  {0,3}    [size = nrow+1  = 1+1]
4007         j =  {0,1,2}  [size = nz = 6]
4008         v =  {4,5,6}  [size = nz = 6]
4009 
4010 .keywords: matrix, aij, compressed row, sparse, parallel
4011 
4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4013           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4014 @*/
4015 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4016 {
4017   PetscErrorCode ierr;
4018 
4019   PetscFunctionBegin;
4020   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4021   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4022   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4023   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4024   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4025   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4026   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4027   PetscFunctionReturn(0);
4028 }
4029 
4030 #undef __FUNCT__
4031 #define __FUNCT__ "MatCreateAIJ"
4032 /*@C
4033    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4034    (the default parallel PETSc format).  For good matrix assembly performance
4035    the user should preallocate the matrix storage by setting the parameters
4036    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4037    performance can be increased by more than a factor of 50.
4038 
4039    Collective on MPI_Comm
4040 
4041    Input Parameters:
4042 +  comm - MPI communicator
4043 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4044            This value should be the same as the local size used in creating the
4045            y vector for the matrix-vector product y = Ax.
4046 .  n - This value should be the same as the local size used in creating the
4047        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4048        calculated if N is given) For square matrices n is almost always m.
4049 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4050 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4051 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4052            (same value is used for all local rows)
4053 .  d_nnz - array containing the number of nonzeros in the various rows of the
4054            DIAGONAL portion of the local submatrix (possibly different for each row)
4055            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4056            The size of this array is equal to the number of local rows, i.e 'm'.
4057 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4058            submatrix (same value is used for all local rows).
4059 -  o_nnz - array containing the number of nonzeros in the various rows of the
4060            OFF-DIAGONAL portion of the local submatrix (possibly different for
4061            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4062            structure. The size of this array is equal to the number
4063            of local rows, i.e 'm'.
4064 
4065    Output Parameter:
4066 .  A - the matrix
4067 
4068    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4069    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4070    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4071 
4072    Notes:
4073    If the *_nnz parameter is given then the *_nz parameter is ignored
4074 
4075    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4076    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4077    storage requirements for this matrix.
4078 
4079    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4080    processor than it must be used on all processors that share the object for
4081    that argument.
4082 
4083    The user MUST specify either the local or global matrix dimensions
4084    (possibly both).
4085 
4086    The parallel matrix is partitioned across processors such that the
4087    first m0 rows belong to process 0, the next m1 rows belong to
4088    process 1, the next m2 rows belong to process 2 etc.. where
4089    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4090    values corresponding to [m x N] submatrix.
4091 
4092    The columns are logically partitioned with the n0 columns belonging
4093    to 0th partition, the next n1 columns belonging to the next
4094    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4095 
4096    The DIAGONAL portion of the local submatrix on any given processor
4097    is the submatrix corresponding to the rows and columns m,n
4098    corresponding to the given processor. i.e diagonal matrix on
4099    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4100    etc. The remaining portion of the local submatrix [m x (N-n)]
4101    constitute the OFF-DIAGONAL portion. The example below better
4102    illustrates this concept.
4103 
4104    For a square global matrix we define each processor's diagonal portion
4105    to be its local rows and the corresponding columns (a square submatrix);
4106    each processor's off-diagonal portion encompasses the remainder of the
4107    local matrix (a rectangular submatrix).
4108 
4109    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4110 
4111    When calling this routine with a single process communicator, a matrix of
4112    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4113    type of communicator, use the construction mechanism:
4114      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4115 
4116    By default, this format uses inodes (identical nodes) when possible.
4117    We search for consecutive rows with the same nonzero structure, thereby
4118    reusing matrix information to achieve increased efficiency.
4119 
4120    Options Database Keys:
4121 +  -mat_no_inode  - Do not use inodes
4122 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4123 -  -mat_aij_oneindex - Internally use indexing starting at 1
4124         rather than 0.  Note that when calling MatSetValues(),
4125         the user still MUST index entries starting at 0!
4126 
4127 
4128    Example usage:
4129 
4130    Consider the following 8x8 matrix with 34 non-zero values, that is
4131    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4132    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4133    as follows:
4134 
4135 .vb
4136             1  2  0  |  0  3  0  |  0  4
4137     Proc0   0  5  6  |  7  0  0  |  8  0
4138             9  0 10  | 11  0  0  | 12  0
4139     -------------------------------------
4140            13  0 14  | 15 16 17  |  0  0
4141     Proc1   0 18  0  | 19 20 21  |  0  0
4142             0  0  0  | 22 23  0  | 24  0
4143     -------------------------------------
4144     Proc2  25 26 27  |  0  0 28  | 29  0
4145            30  0  0  | 31 32 33  |  0 34
4146 .ve
4147 
4148    This can be represented as a collection of submatrices as:
4149 
4150 .vb
4151       A B C
4152       D E F
4153       G H I
4154 .ve
4155 
4156    Where the submatrices A,B,C are owned by proc0, D,E,F are
4157    owned by proc1, G,H,I are owned by proc2.
4158 
4159    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4160    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4161    The 'M','N' parameters are 8,8, and have the same values on all procs.
4162 
4163    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4164    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4165    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4166    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4167    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4168    matrix, ans [DF] as another SeqAIJ matrix.
4169 
4170    When d_nz, o_nz parameters are specified, d_nz storage elements are
4171    allocated for every row of the local diagonal submatrix, and o_nz
4172    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4173    One way to choose d_nz and o_nz is to use the max nonzerors per local
4174    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4175    In this case, the values of d_nz,o_nz are:
4176 .vb
4177      proc0 : dnz = 2, o_nz = 2
4178      proc1 : dnz = 3, o_nz = 2
4179      proc2 : dnz = 1, o_nz = 4
4180 .ve
4181    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4182    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4183    for proc3. i.e we are using 12+15+10=37 storage locations to store
4184    34 values.
4185 
4186    When d_nnz, o_nnz parameters are specified, the storage is specified
4187    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4188    In the above case the values for d_nnz,o_nnz are:
4189 .vb
4190      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4191      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4192      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4193 .ve
4194    Here the space allocated is sum of all the above values i.e 34, and
4195    hence pre-allocation is perfect.
4196 
4197    Level: intermediate
4198 
4199 .keywords: matrix, aij, compressed row, sparse, parallel
4200 
4201 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4202           MPIAIJ, MatCreateMPIAIJWithArrays()
4203 @*/
4204 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4205 {
4206   PetscErrorCode ierr;
4207   PetscMPIInt    size;
4208 
4209   PetscFunctionBegin;
4210   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4211   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4212   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4213   if (size > 1) {
4214     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4215     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4216   } else {
4217     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4218     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4219   }
4220   PetscFunctionReturn(0);
4221 }
4222 
4223 #undef __FUNCT__
4224 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4225 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4226 {
4227   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4228 
4229   PetscFunctionBegin;
4230   *Ad     = a->A;
4231   *Ao     = a->B;
4232   *colmap = a->garray;
4233   PetscFunctionReturn(0);
4234 }
4235 
4236 #undef __FUNCT__
4237 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4238 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4239 {
4240   PetscErrorCode ierr;
4241   PetscInt       i;
4242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4243 
4244   PetscFunctionBegin;
4245   if (coloring->ctype == IS_COLORING_GLOBAL) {
4246     ISColoringValue *allcolors,*colors;
4247     ISColoring      ocoloring;
4248 
4249     /* set coloring for diagonal portion */
4250     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4251 
4252     /* set coloring for off-diagonal portion */
4253     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4254     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4255     for (i=0; i<a->B->cmap->n; i++) {
4256       colors[i] = allcolors[a->garray[i]];
4257     }
4258     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4259     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4260     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4261     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4262   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4263     ISColoringValue *colors;
4264     PetscInt        *larray;
4265     ISColoring      ocoloring;
4266 
4267     /* set coloring for diagonal portion */
4268     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4269     for (i=0; i<a->A->cmap->n; i++) {
4270       larray[i] = i + A->cmap->rstart;
4271     }
4272     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4273     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4274     for (i=0; i<a->A->cmap->n; i++) {
4275       colors[i] = coloring->colors[larray[i]];
4276     }
4277     ierr = PetscFree(larray);CHKERRQ(ierr);
4278     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4279     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4280     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4281 
4282     /* set coloring for off-diagonal portion */
4283     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4284     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4285     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4286     for (i=0; i<a->B->cmap->n; i++) {
4287       colors[i] = coloring->colors[larray[i]];
4288     }
4289     ierr = PetscFree(larray);CHKERRQ(ierr);
4290     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4291     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4292     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4293   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4294 
4295   PetscFunctionReturn(0);
4296 }
4297 
4298 #if defined(PETSC_HAVE_ADIC)
4299 #undef __FUNCT__
4300 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4301 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4302 {
4303   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4304   PetscErrorCode ierr;
4305 
4306   PetscFunctionBegin;
4307   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4308   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4309   PetscFunctionReturn(0);
4310 }
4311 #endif
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4316 {
4317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4318   PetscErrorCode ierr;
4319 
4320   PetscFunctionBegin;
4321   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4322   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4323   PetscFunctionReturn(0);
4324 }
4325 
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4328 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4332   PetscInt       *indx;
4333 
4334   PetscFunctionBegin;
4335   /* This routine will ONLY return MPIAIJ type matrix */
4336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4337   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4338   if (n == PETSC_DECIDE){
4339     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4340   }
4341   /* Check sum(n) = N */
4342   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4343   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4344 
4345   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4346   rstart -= m;
4347 
4348   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4349   for (i=0;i<m;i++) {
4350     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4351     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4352     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4353   }
4354 
4355   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4356   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4357   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4358   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4359   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4360   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4366 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4367 {
4368   PetscErrorCode ierr;
4369   PetscInt       m,N,i,rstart,nnz,Ii;
4370   PetscInt       *indx;
4371   PetscScalar    *values;
4372 
4373   PetscFunctionBegin;
4374   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4375   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4376   for (i=0;i<m;i++) {
4377     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4378     Ii    = i + rstart;
4379     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4380     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4381   }
4382   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #undef __FUNCT__
4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4389 /*@
4390       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4391                  matrices from each processor
4392 
4393     Collective on MPI_Comm
4394 
4395    Input Parameters:
4396 +    comm - the communicators the parallel matrix will live on
4397 .    inmat - the input sequential matrices
4398 .    n - number of local columns (or PETSC_DECIDE)
4399 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4400 
4401    Output Parameter:
4402 .    outmat - the parallel matrix generated
4403 
4404     Level: advanced
4405 
4406    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4407 
4408 @*/
4409 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4410 {
4411   PetscErrorCode ierr;
4412 
4413   PetscFunctionBegin;
4414   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4415   if (scall == MAT_INITIAL_MATRIX){
4416     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4417   }
4418   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4419   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatFileSplit"
4425 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4426 {
4427   PetscErrorCode    ierr;
4428   PetscMPIInt       rank;
4429   PetscInt          m,N,i,rstart,nnz;
4430   size_t            len;
4431   const PetscInt    *indx;
4432   PetscViewer       out;
4433   char              *name;
4434   Mat               B;
4435   const PetscScalar *values;
4436 
4437   PetscFunctionBegin;
4438   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4439   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4440   /* Should this be the type of the diagonal block of A? */
4441   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4442   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4443   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4444   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4445   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4446   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4447   for (i=0;i<m;i++) {
4448     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4449     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4450     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4451   }
4452   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4454 
4455   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4456   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4457   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4458   sprintf(name,"%s.%d",outfile,rank);
4459   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4460   ierr = PetscFree(name);
4461   ierr = MatView(B,out);CHKERRQ(ierr);
4462   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4463   ierr = MatDestroy(&B);CHKERRQ(ierr);
4464   PetscFunctionReturn(0);
4465 }
4466 
4467 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4468 #undef __FUNCT__
4469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4470 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4471 {
4472   PetscErrorCode       ierr;
4473   Mat_Merge_SeqsToMPI  *merge;
4474   PetscContainer       container;
4475 
4476   PetscFunctionBegin;
4477   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4478   if (container) {
4479     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4492     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4493     ierr = PetscFree(merge);CHKERRQ(ierr);
4494     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4495   }
4496   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4497   PetscFunctionReturn(0);
4498 }
4499 
4500 #include <../src/mat/utils/freespace.h>
4501 #include <petscbt.h>
4502 
4503 #undef __FUNCT__
4504 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4505 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4506 {
4507   PetscErrorCode       ierr;
4508   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4509   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4510   PetscMPIInt          size,rank,taga,*len_s;
4511   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4512   PetscInt             proc,m;
4513   PetscInt             **buf_ri,**buf_rj;
4514   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4515   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4516   MPI_Request          *s_waits,*r_waits;
4517   MPI_Status           *status;
4518   MatScalar            *aa=a->a;
4519   MatScalar            **abuf_r,*ba_i;
4520   Mat_Merge_SeqsToMPI  *merge;
4521   PetscContainer       container;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4525 
4526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4528 
4529   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4530   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4531 
4532   bi     = merge->bi;
4533   bj     = merge->bj;
4534   buf_ri = merge->buf_ri;
4535   buf_rj = merge->buf_rj;
4536 
4537   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4538   owners = merge->rowmap->range;
4539   len_s  = merge->len_s;
4540 
4541   /* send and recv matrix values */
4542   /*-----------------------------*/
4543   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4544   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4545 
4546   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4547   for (proc=0,k=0; proc<size; proc++){
4548     if (!len_s[proc]) continue;
4549     i = owners[proc];
4550     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4551     k++;
4552   }
4553 
4554   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4555   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4556   ierr = PetscFree(status);CHKERRQ(ierr);
4557 
4558   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4559   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4560 
4561   /* insert mat values of mpimat */
4562   /*----------------------------*/
4563   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4564   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4565 
4566   for (k=0; k<merge->nrecv; k++){
4567     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4568     nrows = *(buf_ri_k[k]);
4569     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4570     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4571   }
4572 
4573   /* set values of ba */
4574   m = merge->rowmap->n;
4575   for (i=0; i<m; i++) {
4576     arow = owners[rank] + i;
4577     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4578     bnzi = bi[i+1] - bi[i];
4579     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4580 
4581     /* add local non-zero vals of this proc's seqmat into ba */
4582     anzi = ai[arow+1] - ai[arow];
4583     aj   = a->j + ai[arow];
4584     aa   = a->a + ai[arow];
4585     nextaj = 0;
4586     for (j=0; nextaj<anzi; j++){
4587       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4588         ba_i[j] += aa[nextaj++];
4589       }
4590     }
4591 
4592     /* add received vals into ba */
4593     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4594       /* i-th row */
4595       if (i == *nextrow[k]) {
4596         anzi = *(nextai[k]+1) - *nextai[k];
4597         aj   = buf_rj[k] + *(nextai[k]);
4598         aa   = abuf_r[k] + *(nextai[k]);
4599         nextaj = 0;
4600         for (j=0; nextaj<anzi; j++){
4601           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4602             ba_i[j] += aa[nextaj++];
4603           }
4604         }
4605         nextrow[k]++; nextai[k]++;
4606       }
4607     }
4608     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4609   }
4610   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612 
4613   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4615   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4616   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4617   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4622 
4623 #undef __FUNCT__
4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4625 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4626 {
4627   PetscErrorCode       ierr;
4628   Mat                  B_mpi;
4629   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4630   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4631   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4632   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4633   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4634   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4635   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4636   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4637   MPI_Status           *status;
4638   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4639   PetscBT              lnkbt;
4640   Mat_Merge_SeqsToMPI  *merge;
4641   PetscContainer       container;
4642 
4643   PetscFunctionBegin;
4644   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4645 
4646   /* make sure it is a PETSc comm */
4647   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4650 
4651   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4652   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4653 
4654   /* determine row ownership */
4655   /*---------------------------------------------------------*/
4656   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4657   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4661   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4662   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4663 
4664   m      = merge->rowmap->n;
4665   M      = merge->rowmap->N;
4666   owners = merge->rowmap->range;
4667 
4668   /* determine the number of messages to send, their lengths */
4669   /*---------------------------------------------------------*/
4670   len_s  = merge->len_s;
4671 
4672   len = 0;  /* length of buf_si[] */
4673   merge->nsend = 0;
4674   for (proc=0; proc<size; proc++){
4675     len_si[proc] = 0;
4676     if (proc == rank){
4677       len_s[proc] = 0;
4678     } else {
4679       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4680       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4681     }
4682     if (len_s[proc]) {
4683       merge->nsend++;
4684       nrows = 0;
4685       for (i=owners[proc]; i<owners[proc+1]; i++){
4686         if (ai[i+1] > ai[i]) nrows++;
4687       }
4688       len_si[proc] = 2*(nrows+1);
4689       len += len_si[proc];
4690     }
4691   }
4692 
4693   /* determine the number and length of messages to receive for ij-structure */
4694   /*-------------------------------------------------------------------------*/
4695   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4696   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4697 
4698   /* post the Irecv of j-structure */
4699   /*-------------------------------*/
4700   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4701   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4702 
4703   /* post the Isend of j-structure */
4704   /*--------------------------------*/
4705   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4706 
4707   for (proc=0, k=0; proc<size; proc++){
4708     if (!len_s[proc]) continue;
4709     i = owners[proc];
4710     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4711     k++;
4712   }
4713 
4714   /* receives and sends of j-structure are complete */
4715   /*------------------------------------------------*/
4716   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4717   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4718 
4719   /* send and recv i-structure */
4720   /*---------------------------*/
4721   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4722   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4723 
4724   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4725   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4726   for (proc=0,k=0; proc<size; proc++){
4727     if (!len_s[proc]) continue;
4728     /* form outgoing message for i-structure:
4729          buf_si[0]:                 nrows to be sent
4730                [1:nrows]:           row index (global)
4731                [nrows+1:2*nrows+1]: i-structure index
4732     */
4733     /*-------------------------------------------*/
4734     nrows = len_si[proc]/2 - 1;
4735     buf_si_i    = buf_si + nrows+1;
4736     buf_si[0]   = nrows;
4737     buf_si_i[0] = 0;
4738     nrows = 0;
4739     for (i=owners[proc]; i<owners[proc+1]; i++){
4740       anzi = ai[i+1] - ai[i];
4741       if (anzi) {
4742         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4743         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4744         nrows++;
4745       }
4746     }
4747     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4748     k++;
4749     buf_si += len_si[proc];
4750   }
4751 
4752   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4753   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4754 
4755   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4756   for (i=0; i<merge->nrecv; i++){
4757     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4758   }
4759 
4760   ierr = PetscFree(len_si);CHKERRQ(ierr);
4761   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4762   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4763   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4764   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4765   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4766   ierr = PetscFree(status);CHKERRQ(ierr);
4767 
4768   /* compute a local seq matrix in each processor */
4769   /*----------------------------------------------*/
4770   /* allocate bi array and free space for accumulating nonzero column info */
4771   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4772   bi[0] = 0;
4773 
4774   /* create and initialize a linked list */
4775   nlnk = N+1;
4776   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4777 
4778   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4779   len = 0;
4780   len  = ai[owners[rank+1]] - ai[owners[rank]];
4781   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4782   current_space = free_space;
4783 
4784   /* determine symbolic info for each local row */
4785   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4786 
4787   for (k=0; k<merge->nrecv; k++){
4788     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4789     nrows = *buf_ri_k[k];
4790     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4791     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4792   }
4793 
4794   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4795   len = 0;
4796   for (i=0;i<m;i++) {
4797     bnzi   = 0;
4798     /* add local non-zero cols of this proc's seqmat into lnk */
4799     arow   = owners[rank] + i;
4800     anzi   = ai[arow+1] - ai[arow];
4801     aj     = a->j + ai[arow];
4802     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803     bnzi += nlnk;
4804     /* add received col data into lnk */
4805     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4806       if (i == *nextrow[k]) { /* i-th row */
4807         anzi = *(nextai[k]+1) - *nextai[k];
4808         aj   = buf_rj[k] + *nextai[k];
4809         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4810         bnzi += nlnk;
4811         nextrow[k]++; nextai[k]++;
4812       }
4813     }
4814     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4815 
4816     /* if free space is not available, make more free space */
4817     if (current_space->local_remaining<bnzi) {
4818       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4819       nspacedouble++;
4820     }
4821     /* copy data into free space, then initialize lnk */
4822     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4823     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4824 
4825     current_space->array           += bnzi;
4826     current_space->local_used      += bnzi;
4827     current_space->local_remaining -= bnzi;
4828 
4829     bi[i+1] = bi[i] + bnzi;
4830   }
4831 
4832   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4833 
4834   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4835   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4836   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4837 
4838   /* create symbolic parallel matrix B_mpi */
4839   /*---------------------------------------*/
4840     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4841   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4842   if (n==PETSC_DECIDE) {
4843     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4844   } else {
4845     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4846   }
4847   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4848   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4849   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4850   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4851   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4852 
4853   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4854   B_mpi->assembled     = PETSC_FALSE;
4855   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4856   merge->bi            = bi;
4857   merge->bj            = bj;
4858   merge->buf_ri        = buf_ri;
4859   merge->buf_rj        = buf_rj;
4860   merge->coi           = PETSC_NULL;
4861   merge->coj           = PETSC_NULL;
4862   merge->owners_co     = PETSC_NULL;
4863 
4864   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4865 
4866   /* attach the supporting struct to B_mpi for reuse */
4867   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4868   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4869   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4870   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4871   *mpimat = B_mpi;
4872 
4873   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 #undef __FUNCT__
4878 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4879 /*@C
4880       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4881                  matrices from each processor
4882 
4883     Collective on MPI_Comm
4884 
4885    Input Parameters:
4886 +    comm - the communicators the parallel matrix will live on
4887 .    seqmat - the input sequential matrices
4888 .    m - number of local rows (or PETSC_DECIDE)
4889 .    n - number of local columns (or PETSC_DECIDE)
4890 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4891 
4892    Output Parameter:
4893 .    mpimat - the parallel matrix generated
4894 
4895     Level: advanced
4896 
4897    Notes:
4898      The dimensions of the sequential matrix in each processor MUST be the same.
4899      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4900      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4901 @*/
4902 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4903 {
4904   PetscErrorCode   ierr;
4905   PetscMPIInt     size;
4906 
4907   PetscFunctionBegin;
4908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4909   if (size == 1){
4910      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4911      if (scall == MAT_INITIAL_MATRIX){
4912        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4913      } else {
4914        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4915      }
4916      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4917      PetscFunctionReturn(0);
4918   }
4919   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920   if (scall == MAT_INITIAL_MATRIX){
4921     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4922   }
4923   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4924   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4930 /*@
4931      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4932           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4933           with MatGetSize()
4934 
4935     Not Collective
4936 
4937    Input Parameters:
4938 +    A - the matrix
4939 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4940 
4941    Output Parameter:
4942 .    A_loc - the local sequential matrix generated
4943 
4944     Level: developer
4945 
4946 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4947 
4948 @*/
4949 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4950 {
4951   PetscErrorCode  ierr;
4952   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4953   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4954   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4955   MatScalar       *aa=a->a,*ba=b->a,*cam;
4956   PetscScalar     *ca;
4957   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4958   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4959   PetscBool       match;
4960 
4961   PetscFunctionBegin;
4962   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4963   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4964   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4965   if (scall == MAT_INITIAL_MATRIX){
4966     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4967     ci[0] = 0;
4968     for (i=0; i<am; i++){
4969       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4970     }
4971     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4972     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4973     k = 0;
4974     for (i=0; i<am; i++) {
4975       ncols_o = bi[i+1] - bi[i];
4976       ncols_d = ai[i+1] - ai[i];
4977       /* off-diagonal portion of A */
4978       for (jo=0; jo<ncols_o; jo++) {
4979         col = cmap[*bj];
4980         if (col >= cstart) break;
4981         cj[k]   = col; bj++;
4982         ca[k++] = *ba++;
4983       }
4984       /* diagonal portion of A */
4985       for (j=0; j<ncols_d; j++) {
4986         cj[k]   = cstart + *aj++;
4987         ca[k++] = *aa++;
4988       }
4989       /* off-diagonal portion of A */
4990       for (j=jo; j<ncols_o; j++) {
4991         cj[k]   = cmap[*bj++];
4992         ca[k++] = *ba++;
4993       }
4994     }
4995     /* put together the new matrix */
4996     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4997     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4998     /* Since these are PETSc arrays, change flags to free them as necessary. */
4999     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5000     mat->free_a  = PETSC_TRUE;
5001     mat->free_ij = PETSC_TRUE;
5002     mat->nonew   = 0;
5003   } else if (scall == MAT_REUSE_MATRIX){
5004     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5005     ci = mat->i; cj = mat->j; cam = mat->a;
5006     for (i=0; i<am; i++) {
5007       /* off-diagonal portion of A */
5008       ncols_o = bi[i+1] - bi[i];
5009       for (jo=0; jo<ncols_o; jo++) {
5010         col = cmap[*bj];
5011         if (col >= cstart) break;
5012         *cam++ = *ba++; bj++;
5013       }
5014       /* diagonal portion of A */
5015       ncols_d = ai[i+1] - ai[i];
5016       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5017       /* off-diagonal portion of A */
5018       for (j=jo; j<ncols_o; j++) {
5019         *cam++ = *ba++; bj++;
5020       }
5021     }
5022   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5023   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 #undef __FUNCT__
5028 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5029 /*@C
5030      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5031 
5032     Not Collective
5033 
5034    Input Parameters:
5035 +    A - the matrix
5036 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5037 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5038 
5039    Output Parameter:
5040 .    A_loc - the local sequential matrix generated
5041 
5042     Level: developer
5043 
5044 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5045 
5046 @*/
5047 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5048 {
5049   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5050   PetscErrorCode    ierr;
5051   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5052   IS                isrowa,iscola;
5053   Mat               *aloc;
5054   PetscBool       match;
5055 
5056   PetscFunctionBegin;
5057   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5058   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5059   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5060   if (!row){
5061     start = A->rmap->rstart; end = A->rmap->rend;
5062     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5063   } else {
5064     isrowa = *row;
5065   }
5066   if (!col){
5067     start = A->cmap->rstart;
5068     cmap  = a->garray;
5069     nzA   = a->A->cmap->n;
5070     nzB   = a->B->cmap->n;
5071     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5072     ncols = 0;
5073     for (i=0; i<nzB; i++) {
5074       if (cmap[i] < start) idx[ncols++] = cmap[i];
5075       else break;
5076     }
5077     imark = i;
5078     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5079     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5080     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5081   } else {
5082     iscola = *col;
5083   }
5084   if (scall != MAT_INITIAL_MATRIX){
5085     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5086     aloc[0] = *A_loc;
5087   }
5088   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5089   *A_loc = aloc[0];
5090   ierr = PetscFree(aloc);CHKERRQ(ierr);
5091   if (!row){
5092     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5093   }
5094   if (!col){
5095     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5096   }
5097   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5098   PetscFunctionReturn(0);
5099 }
5100 
5101 #undef __FUNCT__
5102 #define __FUNCT__ "MatGetBrowsOfAcols"
5103 /*@C
5104     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5105 
5106     Collective on Mat
5107 
5108    Input Parameters:
5109 +    A,B - the matrices in mpiaij format
5110 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5111 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5112 
5113    Output Parameter:
5114 +    rowb, colb - index sets of rows and columns of B to extract
5115 -    B_seq - the sequential matrix generated
5116 
5117     Level: developer
5118 
5119 @*/
5120 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5121 {
5122   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5123   PetscErrorCode    ierr;
5124   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5125   IS                isrowb,iscolb;
5126   Mat               *bseq=PETSC_NULL;
5127 
5128   PetscFunctionBegin;
5129   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5130     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5131   }
5132   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5133 
5134   if (scall == MAT_INITIAL_MATRIX){
5135     start = A->cmap->rstart;
5136     cmap  = a->garray;
5137     nzA   = a->A->cmap->n;
5138     nzB   = a->B->cmap->n;
5139     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5140     ncols = 0;
5141     for (i=0; i<nzB; i++) {  /* row < local row index */
5142       if (cmap[i] < start) idx[ncols++] = cmap[i];
5143       else break;
5144     }
5145     imark = i;
5146     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5147     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5148     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5149     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5150   } else {
5151     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5152     isrowb = *rowb; iscolb = *colb;
5153     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5154     bseq[0] = *B_seq;
5155   }
5156   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5157   *B_seq = bseq[0];
5158   ierr = PetscFree(bseq);CHKERRQ(ierr);
5159   if (!rowb){
5160     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5161   } else {
5162     *rowb = isrowb;
5163   }
5164   if (!colb){
5165     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5166   } else {
5167     *colb = iscolb;
5168   }
5169   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 #undef __FUNCT__
5174 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5175 /*
5176     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5177     of the OFF-DIAGONAL portion of local A
5178 
5179     Collective on Mat
5180 
5181    Input Parameters:
5182 +    A,B - the matrices in mpiaij format
5183 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5184 
5185    Output Parameter:
5186 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5187 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5188 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5189 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5190 
5191     Level: developer
5192 
5193 */
5194 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5195 {
5196   VecScatter_MPI_General *gen_to,*gen_from;
5197   PetscErrorCode         ierr;
5198   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5199   Mat_SeqAIJ             *b_oth;
5200   VecScatter             ctx=a->Mvctx;
5201   MPI_Comm               comm=((PetscObject)ctx)->comm;
5202   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5203   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5204   PetscScalar            *rvalues,*svalues;
5205   MatScalar              *b_otha,*bufa,*bufA;
5206   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5207   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5208   MPI_Status             *sstatus,rstatus;
5209   PetscMPIInt            jj;
5210   PetscInt               *cols,sbs,rbs;
5211   PetscScalar            *vals;
5212 
5213   PetscFunctionBegin;
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5218   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5219 
5220   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5221   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5222   rvalues  = gen_from->values; /* holds the length of receiving row */
5223   svalues  = gen_to->values;   /* holds the length of sending row */
5224   nrecvs   = gen_from->n;
5225   nsends   = gen_to->n;
5226 
5227   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5228   srow     = gen_to->indices;   /* local row index to be sent */
5229   sstarts  = gen_to->starts;
5230   sprocs   = gen_to->procs;
5231   sstatus  = gen_to->sstatus;
5232   sbs      = gen_to->bs;
5233   rstarts  = gen_from->starts;
5234   rprocs   = gen_from->procs;
5235   rbs      = gen_from->bs;
5236 
5237   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5238   if (scall == MAT_INITIAL_MATRIX){
5239     /* i-array */
5240     /*---------*/
5241     /*  post receives */
5242     for (i=0; i<nrecvs; i++){
5243       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5244       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5245       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5246     }
5247 
5248     /* pack the outgoing message */
5249     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5250     sstartsj[0] = 0;  rstartsj[0] = 0;
5251     len = 0; /* total length of j or a array to be sent */
5252     k = 0;
5253     for (i=0; i<nsends; i++){
5254       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5255       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5256       for (j=0; j<nrows; j++) {
5257         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5258         for (l=0; l<sbs; l++){
5259           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5260           rowlen[j*sbs+l] = ncols;
5261           len += ncols;
5262           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5263         }
5264         k++;
5265       }
5266       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5267       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5268     }
5269     /* recvs and sends of i-array are completed */
5270     i = nrecvs;
5271     while (i--) {
5272       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5273     }
5274     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5275 
5276     /* allocate buffers for sending j and a arrays */
5277     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5278     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5279 
5280     /* create i-array of B_oth */
5281     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5282     b_othi[0] = 0;
5283     len = 0; /* total length of j or a array to be received */
5284     k = 0;
5285     for (i=0; i<nrecvs; i++){
5286       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5287       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5288       for (j=0; j<nrows; j++) {
5289         b_othi[k+1] = b_othi[k] + rowlen[j];
5290         len += rowlen[j]; k++;
5291       }
5292       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5293     }
5294 
5295     /* allocate space for j and a arrrays of B_oth */
5296     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5297     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5298 
5299     /* j-array */
5300     /*---------*/
5301     /*  post receives of j-array */
5302     for (i=0; i<nrecvs; i++){
5303       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5304       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5305     }
5306 
5307     /* pack the outgoing message j-array */
5308     k = 0;
5309     for (i=0; i<nsends; i++){
5310       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5311       bufJ = bufj+sstartsj[i];
5312       for (j=0; j<nrows; j++) {
5313         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5314         for (ll=0; ll<sbs; ll++){
5315           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5316           for (l=0; l<ncols; l++){
5317             *bufJ++ = cols[l];
5318           }
5319           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5320         }
5321       }
5322       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5323     }
5324 
5325     /* recvs and sends of j-array are completed */
5326     i = nrecvs;
5327     while (i--) {
5328       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5329     }
5330     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5331   } else if (scall == MAT_REUSE_MATRIX){
5332     sstartsj = *startsj_s;
5333     rstartsj = *startsj_r;
5334     bufa     = *bufa_ptr;
5335     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5336     b_otha   = b_oth->a;
5337   } else {
5338     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5339   }
5340 
5341   /* a-array */
5342   /*---------*/
5343   /*  post receives of a-array */
5344   for (i=0; i<nrecvs; i++){
5345     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5346     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5347   }
5348 
5349   /* pack the outgoing message a-array */
5350   k = 0;
5351   for (i=0; i<nsends; i++){
5352     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5353     bufA = bufa+sstartsj[i];
5354     for (j=0; j<nrows; j++) {
5355       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5356       for (ll=0; ll<sbs; ll++){
5357         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5358         for (l=0; l<ncols; l++){
5359           *bufA++ = vals[l];
5360         }
5361         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5362       }
5363     }
5364     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5365   }
5366   /* recvs and sends of a-array are completed */
5367   i = nrecvs;
5368   while (i--) {
5369     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5370   }
5371   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5372   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5373 
5374   if (scall == MAT_INITIAL_MATRIX){
5375     /* put together the new matrix */
5376     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5377 
5378     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5379     /* Since these are PETSc arrays, change flags to free them as necessary. */
5380     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5381     b_oth->free_a  = PETSC_TRUE;
5382     b_oth->free_ij = PETSC_TRUE;
5383     b_oth->nonew   = 0;
5384 
5385     ierr = PetscFree(bufj);CHKERRQ(ierr);
5386     if (!startsj_s || !bufa_ptr){
5387       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5388       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5389     } else {
5390       *startsj_s = sstartsj;
5391       *startsj_r = rstartsj;
5392       *bufa_ptr  = bufa;
5393     }
5394   }
5395   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5396   PetscFunctionReturn(0);
5397 }
5398 
5399 #undef __FUNCT__
5400 #define __FUNCT__ "MatGetCommunicationStructs"
5401 /*@C
5402   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5403 
5404   Not Collective
5405 
5406   Input Parameters:
5407 . A - The matrix in mpiaij format
5408 
5409   Output Parameter:
5410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5411 . colmap - A map from global column index to local index into lvec
5412 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5413 
5414   Level: developer
5415 
5416 @*/
5417 #if defined (PETSC_USE_CTABLE)
5418 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5419 #else
5420 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5421 #endif
5422 {
5423   Mat_MPIAIJ *a;
5424 
5425   PetscFunctionBegin;
5426   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5427   PetscValidPointer(lvec, 2);
5428   PetscValidPointer(colmap, 3);
5429   PetscValidPointer(multScatter, 4);
5430   a = (Mat_MPIAIJ *) A->data;
5431   if (lvec) *lvec = a->lvec;
5432   if (colmap) *colmap = a->colmap;
5433   if (multScatter) *multScatter = a->Mvctx;
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 EXTERN_C_BEGIN
5438 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5439 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5440 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5441 EXTERN_C_END
5442 
5443 #undef __FUNCT__
5444 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5445 /*
5446     Computes (B'*A')' since computing B*A directly is untenable
5447 
5448                n                       p                          p
5449         (              )       (              )         (                  )
5450       m (      A       )  *  n (       B      )   =   m (         C        )
5451         (              )       (              )         (                  )
5452 
5453 */
5454 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5455 {
5456   PetscErrorCode     ierr;
5457   Mat                At,Bt,Ct;
5458 
5459   PetscFunctionBegin;
5460   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5461   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5462   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5463   ierr = MatDestroy(&At);CHKERRQ(ierr);
5464   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5465   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5466   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 #undef __FUNCT__
5471 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5472 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5473 {
5474   PetscErrorCode ierr;
5475   PetscInt       m=A->rmap->n,n=B->cmap->n;
5476   Mat            Cmat;
5477 
5478   PetscFunctionBegin;
5479   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5480   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5481   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5482   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5483   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5484   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5485   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5486   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5487   *C   = Cmat;
5488   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 /* ----------------------------------------------------------------*/
5493 #undef __FUNCT__
5494 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5495 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5496 {
5497   PetscErrorCode ierr;
5498 
5499   PetscFunctionBegin;
5500   if (scall == MAT_INITIAL_MATRIX){
5501     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5502   }
5503   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5504   PetscFunctionReturn(0);
5505 }
5506 
5507 EXTERN_C_BEGIN
5508 #if defined(PETSC_HAVE_MUMPS)
5509 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5510 #endif
5511 #if defined(PETSC_HAVE_PASTIX)
5512 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5513 #endif
5514 #if defined(PETSC_HAVE_SUPERLU_DIST)
5515 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5516 #endif
5517 #if defined(PETSC_HAVE_SPOOLES)
5518 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5519 #endif
5520 EXTERN_C_END
5521 
5522 /*MC
5523    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5524 
5525    Options Database Keys:
5526 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5527 
5528   Level: beginner
5529 
5530 .seealso: MatCreateAIJ()
5531 M*/
5532 
5533 EXTERN_C_BEGIN
5534 #undef __FUNCT__
5535 #define __FUNCT__ "MatCreate_MPIAIJ"
5536 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5537 {
5538   Mat_MPIAIJ     *b;
5539   PetscErrorCode ierr;
5540   PetscMPIInt    size;
5541 
5542   PetscFunctionBegin;
5543   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5544 
5545   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5546   B->data         = (void*)b;
5547   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5548   B->assembled    = PETSC_FALSE;
5549 
5550   B->insertmode   = NOT_SET_VALUES;
5551   b->size         = size;
5552   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5553 
5554   /* build cache for off array entries formed */
5555   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5556   b->donotstash  = PETSC_FALSE;
5557   b->colmap      = 0;
5558   b->garray      = 0;
5559   b->roworiented = PETSC_TRUE;
5560 
5561   /* stuff used for matrix vector multiply */
5562   b->lvec      = PETSC_NULL;
5563   b->Mvctx     = PETSC_NULL;
5564 
5565   /* stuff for MatGetRow() */
5566   b->rowindices   = 0;
5567   b->rowvalues    = 0;
5568   b->getrowactive = PETSC_FALSE;
5569 
5570 #if defined(PETSC_HAVE_SPOOLES)
5571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5572                                      "MatGetFactor_mpiaij_spooles",
5573                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5574 #endif
5575 #if defined(PETSC_HAVE_MUMPS)
5576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5577                                      "MatGetFactor_aij_mumps",
5578                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5579 #endif
5580 #if defined(PETSC_HAVE_PASTIX)
5581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5582 					   "MatGetFactor_mpiaij_pastix",
5583 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5584 #endif
5585 #if defined(PETSC_HAVE_SUPERLU_DIST)
5586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5587                                      "MatGetFactor_mpiaij_superlu_dist",
5588                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5589 #endif
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5591                                      "MatStoreValues_MPIAIJ",
5592                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5594                                      "MatRetrieveValues_MPIAIJ",
5595                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5597 				     "MatGetDiagonalBlock_MPIAIJ",
5598                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5600 				     "MatIsTranspose_MPIAIJ",
5601 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5603 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5604 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5606 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5607 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5609 				     "MatDiagonalScaleLocal_MPIAIJ",
5610 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5612                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5613                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5615                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5616                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5618                                      "MatConvert_MPIAIJ_MPISBAIJ",
5619                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5621                                      "MatMatMult_MPIDense_MPIAIJ",
5622                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5624                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5625                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5627                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5628                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5629   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5630   PetscFunctionReturn(0);
5631 }
5632 EXTERN_C_END
5633 
5634 #undef __FUNCT__
5635 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5636 /*@
5637      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5638          and "off-diagonal" part of the matrix in CSR format.
5639 
5640    Collective on MPI_Comm
5641 
5642    Input Parameters:
5643 +  comm - MPI communicator
5644 .  m - number of local rows (Cannot be PETSC_DECIDE)
5645 .  n - This value should be the same as the local size used in creating the
5646        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5647        calculated if N is given) For square matrices n is almost always m.
5648 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5649 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5650 .   i - row indices for "diagonal" portion of matrix
5651 .   j - column indices
5652 .   a - matrix values
5653 .   oi - row indices for "off-diagonal" portion of matrix
5654 .   oj - column indices
5655 -   oa - matrix values
5656 
5657    Output Parameter:
5658 .   mat - the matrix
5659 
5660    Level: advanced
5661 
5662    Notes:
5663        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5664        must free the arrays once the matrix has been destroyed and not before.
5665 
5666        The i and j indices are 0 based
5667 
5668        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5669 
5670        This sets local rows and cannot be used to set off-processor values.
5671 
5672        You cannot later use MatSetValues() to change values in this matrix.
5673 
5674 .keywords: matrix, aij, compressed row, sparse, parallel
5675 
5676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5677           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5678 @*/
5679 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5680 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5681 {
5682   PetscErrorCode ierr;
5683   Mat_MPIAIJ     *maij;
5684 
5685  PetscFunctionBegin;
5686   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5687   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5688   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5689   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5690   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5691   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5692   maij = (Mat_MPIAIJ*) (*mat)->data;
5693   maij->donotstash     = PETSC_TRUE;
5694   (*mat)->preallocated = PETSC_TRUE;
5695 
5696   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5697   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5698 
5699   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5700   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706 
5707   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5708   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5709   PetscFunctionReturn(0);
5710 }
5711 
5712 /*
5713     Special version for direct calls from Fortran
5714 */
5715 #include <petsc-private/fortranimpl.h>
5716 
5717 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5718 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5719 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5720 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5721 #endif
5722 
5723 /* Change these macros so can be used in void function */
5724 #undef CHKERRQ
5725 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5726 #undef SETERRQ2
5727 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5728 #undef SETERRQ3
5729 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5730 #undef SETERRQ
5731 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5732 
5733 EXTERN_C_BEGIN
5734 #undef __FUNCT__
5735 #define __FUNCT__ "matsetvaluesmpiaij_"
5736 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5737 {
5738   Mat             mat = *mmat;
5739   PetscInt        m = *mm, n = *mn;
5740   InsertMode      addv = *maddv;
5741   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5742   PetscScalar     value;
5743   PetscErrorCode  ierr;
5744 
5745   MatCheckPreallocated(mat,1);
5746   if (mat->insertmode == NOT_SET_VALUES) {
5747     mat->insertmode = addv;
5748   }
5749 #if defined(PETSC_USE_DEBUG)
5750   else if (mat->insertmode != addv) {
5751     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5752   }
5753 #endif
5754   {
5755   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5756   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5757   PetscBool       roworiented = aij->roworiented;
5758 
5759   /* Some Variables required in the macro */
5760   Mat             A = aij->A;
5761   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5762   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5763   MatScalar       *aa = a->a;
5764   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5765   Mat             B = aij->B;
5766   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5767   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5768   MatScalar       *ba = b->a;
5769 
5770   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5771   PetscInt        nonew = a->nonew;
5772   MatScalar       *ap1,*ap2;
5773 
5774   PetscFunctionBegin;
5775   for (i=0; i<m; i++) {
5776     if (im[i] < 0) continue;
5777 #if defined(PETSC_USE_DEBUG)
5778     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5779 #endif
5780     if (im[i] >= rstart && im[i] < rend) {
5781       row      = im[i] - rstart;
5782       lastcol1 = -1;
5783       rp1      = aj + ai[row];
5784       ap1      = aa + ai[row];
5785       rmax1    = aimax[row];
5786       nrow1    = ailen[row];
5787       low1     = 0;
5788       high1    = nrow1;
5789       lastcol2 = -1;
5790       rp2      = bj + bi[row];
5791       ap2      = ba + bi[row];
5792       rmax2    = bimax[row];
5793       nrow2    = bilen[row];
5794       low2     = 0;
5795       high2    = nrow2;
5796 
5797       for (j=0; j<n; j++) {
5798         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5799         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5800         if (in[j] >= cstart && in[j] < cend){
5801           col = in[j] - cstart;
5802           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5803         } else if (in[j] < 0) continue;
5804 #if defined(PETSC_USE_DEBUG)
5805         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5806 #endif
5807         else {
5808           if (mat->was_assembled) {
5809             if (!aij->colmap) {
5810               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5811             }
5812 #if defined (PETSC_USE_CTABLE)
5813             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5814 	    col--;
5815 #else
5816             col = aij->colmap[in[j]] - 1;
5817 #endif
5818             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5819               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5820               col =  in[j];
5821               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5822               B = aij->B;
5823               b = (Mat_SeqAIJ*)B->data;
5824               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5825               rp2      = bj + bi[row];
5826               ap2      = ba + bi[row];
5827               rmax2    = bimax[row];
5828               nrow2    = bilen[row];
5829               low2     = 0;
5830               high2    = nrow2;
5831               bm       = aij->B->rmap->n;
5832               ba = b->a;
5833             }
5834           } else col = in[j];
5835           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5836         }
5837       }
5838     } else {
5839       if (!aij->donotstash) {
5840         if (roworiented) {
5841           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5842         } else {
5843           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5844         }
5845       }
5846     }
5847   }}
5848   PetscFunctionReturnVoid();
5849 }
5850 EXTERN_C_END
5851 
5852