xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f1e55ea32162cf9a365715cdae2babe91dbef974)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_Redundant"
1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Redundant  *redund = *redundant;
1125   PetscInt       i;
1126 
1127   PetscFunctionBegin;
1128   *redundant = NULL;
1129   if (redund){
1130     if (redund->matseq) { /* via MatGetSubMatrices()  */
1131       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1132       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1133       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1135     } else {
1136       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1137       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1138       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1139       for (i=0; i<redund->nrecvs; i++) {
1140         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1141         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1142       }
1143       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1144     }
1145 
1146     if (redund->psubcomm) {
1147       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree(redund);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined(PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1191 #if defined(PETSC_HAVE_ELEMENTAL)
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1193 #endif
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1204   PetscErrorCode ierr;
1205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1206   int            fd;
1207   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1208   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1209   PetscScalar    *column_values;
1210   PetscInt       message_count,flowcontrolcount;
1211   FILE           *file;
1212 
1213   PetscFunctionBegin;
1214   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1216   nz   = A->nz + B->nz;
1217   if (!rank) {
1218     header[0] = MAT_FILE_CLASSID;
1219     header[1] = mat->rmap->N;
1220     header[2] = mat->cmap->N;
1221 
1222     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1224     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     /* get largest number of rows any processor has */
1226     rlen  = mat->rmap->n;
1227     range = mat->rmap->range;
1228     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1229   } else {
1230     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     rlen = mat->rmap->n;
1232   }
1233 
1234   /* load up the local row counts */
1235   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1236   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1251     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* th processor needs space a largest processor needs */
1258   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1260   cnt   = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ((col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1267     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column indices to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1291 
1292   /* load up the local column values */
1293   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1294   cnt  = 0;
1295   for (i=0; i<mat->rmap->n; i++) {
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if (garray[B->j[j]] > cstart) break;
1298       column_values[cnt++] = B->a[j];
1299     }
1300     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1301     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1302   }
1303   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1304 
1305   /* store the column values to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1314       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(column_values);CHKERRQ(ierr);
1325 
1326   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1327   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include <petscdraw.h>
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1335 {
1336   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1337   PetscErrorCode    ierr;
1338   PetscMPIInt       rank = aij->rank,size = aij->size;
1339   PetscBool         isdraw,iascii,isbinary;
1340   PetscViewer       sviewer;
1341   PetscViewerFormat format;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1346   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1347   if (iascii) {
1348     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1349     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1350       MatInfo   info;
1351       PetscBool inodes;
1352 
1353       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1354       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1357       if (!inodes) {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1360       } else {
1361         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1362                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1363       }
1364       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1366       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1368       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1371       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1374       PetscInt inodecount,inodelimit,*inodes;
1375       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1376       if (inodes) {
1377         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1378       } else {
1379         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1380       }
1381       PetscFunctionReturn(0);
1382     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1383       PetscFunctionReturn(0);
1384     }
1385   } else if (isbinary) {
1386     if (size == 1) {
1387       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1388       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1389     } else {
1390       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1391     }
1392     PetscFunctionReturn(0);
1393   } else if (isdraw) {
1394     PetscDraw draw;
1395     PetscBool isnull;
1396     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1397     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1398   }
1399 
1400   {
1401     /* assemble the entire matrix onto first processor. */
1402     Mat        A;
1403     Mat_SeqAIJ *Aloc;
1404     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1405     MatScalar  *a;
1406 
1407     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1408     if (!rank) {
1409       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1410     } else {
1411       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1412     }
1413     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1414     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1415     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1416     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1417     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1418 
1419     /* copy over the A part */
1420     Aloc = (Mat_SeqAIJ*)aij->A->data;
1421     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1422     row  = mat->rmap->rstart;
1423     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1424     for (i=0; i<m; i++) {
1425       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1426       row++;
1427       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1428     }
1429     aj = Aloc->j;
1430     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1431 
1432     /* copy over the B part */
1433     Aloc = (Mat_SeqAIJ*)aij->B->data;
1434     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1435     row  = mat->rmap->rstart;
1436     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1437     ct   = cols;
1438     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++;
1442       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443     }
1444     ierr = PetscFree(ct);CHKERRQ(ierr);
1445     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     /*
1448        Everyone has to call to draw the matrix since the graphics waits are
1449        synchronized across all processors that share the PetscDraw object
1450     */
1451     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     if (!rank) {
1453       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1454     }
1455     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1456     ierr = MatDestroy(&A);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatView_MPIAIJ"
1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1464 {
1465   PetscErrorCode ierr;
1466   PetscBool      iascii,isdraw,issocket,isbinary;
1467 
1468   PetscFunctionBegin;
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1473   if (iascii || isdraw || isbinary || issocket) {
1474     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSOR_MPIAIJ"
1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1482 {
1483   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1484   PetscErrorCode ierr;
1485   Vec            bb1 = 0;
1486   PetscBool      hasop;
1487 
1488   PetscFunctionBegin;
1489   if (flag == SOR_APPLY_UPPER) {
1490     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491     PetscFunctionReturn(0);
1492   }
1493 
1494   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1495     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1496   }
1497 
1498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503 
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1532     if (flag & SOR_ZERO_INITIAL_GUESS) {
1533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534       its--;
1535     }
1536     while (its--) {
1537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539 
1540       /* update rhs: bb1 = bb - B*x */
1541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1543 
1544       /* local sweep */
1545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1546     }
1547   } else if (flag & SOR_EISENSTAT) {
1548     Vec xx1;
1549 
1550     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1552 
1553     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555     if (!mat->diag) {
1556       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1557       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1558     }
1559     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1560     if (hasop) {
1561       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1562     } else {
1563       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1564     }
1565     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1566 
1567     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1568 
1569     /* local sweep */
1570     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1571     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1572     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1573   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1574 
1575   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatPermute_MPIAIJ"
1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1582 {
1583   Mat            aA,aB,Aperm;
1584   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1585   PetscScalar    *aa,*ba;
1586   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1587   PetscSF        rowsf,sf;
1588   IS             parcolp = NULL;
1589   PetscBool      done;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1594   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1595   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1596   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1597 
1598   /* Invert row permutation to find out where my rows should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1602   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1603   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1605 
1606   /* Invert column permutation to find out where my columns should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1610   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1611   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1616   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1617   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1618 
1619   /* Find out where my gcols should go */
1620   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1621   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1623   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1624   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1625   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1627   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1628 
1629   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1631   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) {
1633     PetscInt row = rdest[i],rowner;
1634     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1635     for (j=ai[i]; j<ai[i+1]; j++) {
1636       PetscInt cowner,col = cdest[aj[j]];
1637       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1638       if (rowner == cowner) dnnz[i]++;
1639       else onnz[i]++;
1640     }
1641     for (j=bi[i]; j<bi[i+1]; j++) {
1642       PetscInt cowner,col = gcdest[bj[j]];
1643       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1644       if (rowner == cowner) dnnz[i]++;
1645       else onnz[i]++;
1646     }
1647   }
1648   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1652   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1653 
1654   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1659     PetscInt j0,rowlen;
1660     rowlen = ai[i+1] - ai[i];
1661     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1662       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1663       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     rowlen = bi[i+1] - bi[i];
1666     for (j0=j=0; j<rowlen; j0=j) {
1667       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1668       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1669     }
1670   }
1671   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1674   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1676   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1677   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1678   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1679   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1680   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1681   *B = Aperm;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1688 {
1689   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1690   Mat            A    = mat->A,B = mat->B;
1691   PetscErrorCode ierr;
1692   PetscReal      isend[5],irecv[5];
1693 
1694   PetscFunctionBegin;
1695   info->block_size = 1.0;
1696   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1697 
1698   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1699   isend[3] = info->memory;  isend[4] = info->mallocs;
1700 
1701   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1704   isend[3] += info->memory;  isend[4] += info->mallocs;
1705   if (flag == MAT_LOCAL) {
1706     info->nz_used      = isend[0];
1707     info->nz_allocated = isend[1];
1708     info->nz_unneeded  = isend[2];
1709     info->memory       = isend[3];
1710     info->mallocs      = isend[4];
1711   } else if (flag == MAT_GLOBAL_MAX) {
1712     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1713 
1714     info->nz_used      = irecv[0];
1715     info->nz_allocated = irecv[1];
1716     info->nz_unneeded  = irecv[2];
1717     info->memory       = irecv[3];
1718     info->mallocs      = irecv[4];
1719   } else if (flag == MAT_GLOBAL_SUM) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   }
1728   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1729   info->fill_ratio_needed = 0;
1730   info->factor_mallocs    = 0;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetOption_MPIAIJ"
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_STRUCTURALLY_SYMMETRIC:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_HERMITIAN:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_SYMMETRY_ETERNAL:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   default:
1789     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatGetRow_MPIAIJ"
1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1797 {
1798   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1799   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1800   PetscErrorCode ierr;
1801   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1802   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1803   PetscInt       *cmap,*idx_p;
1804 
1805   PetscFunctionBegin;
1806   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1807   mat->getrowactive = PETSC_TRUE;
1808 
1809   if (!mat->rowvalues && (idx || v)) {
1810     /*
1811         allocate enough space to hold information from the longest row.
1812     */
1813     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1814     PetscInt   max = 1,tmp;
1815     for (i=0; i<matin->rmap->n; i++) {
1816       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1817       if (max < tmp) max = tmp;
1818     }
1819     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1820   }
1821 
1822   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1823   lrow = row - rstart;
1824 
1825   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1826   if (!v)   {pvA = 0; pvB = 0;}
1827   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1828   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1829   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1830   nztot = nzA + nzB;
1831 
1832   cmap = mat->garray;
1833   if (v  || idx) {
1834     if (nztot) {
1835       /* Sort by increasing column numbers, assuming A and B already sorted */
1836       PetscInt imark = -1;
1837       if (v) {
1838         *v = v_p = mat->rowvalues;
1839         for (i=0; i<nzB; i++) {
1840           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1841           else break;
1842         }
1843         imark = i;
1844         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1845         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1846       }
1847       if (idx) {
1848         *idx = idx_p = mat->rowindices;
1849         if (imark > -1) {
1850           for (i=0; i<imark; i++) {
1851             idx_p[i] = cmap[cworkB[i]];
1852           }
1853         } else {
1854           for (i=0; i<nzB; i++) {
1855             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1856             else break;
1857           }
1858           imark = i;
1859         }
1860         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1861         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1862       }
1863     } else {
1864       if (idx) *idx = 0;
1865       if (v)   *v   = 0;
1866     }
1867   }
1868   *nz  = nztot;
1869   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1877 {
1878   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1879 
1880   PetscFunctionBegin;
1881   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1882   aij->getrowactive = PETSC_FALSE;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatNorm_MPIAIJ"
1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1889 {
1890   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1891   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1892   PetscErrorCode ierr;
1893   PetscInt       i,j,cstart = mat->cmap->rstart;
1894   PetscReal      sum = 0.0;
1895   MatScalar      *v;
1896 
1897   PetscFunctionBegin;
1898   if (aij->size == 1) {
1899     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1900   } else {
1901     if (type == NORM_FROBENIUS) {
1902       v = amat->a;
1903       for (i=0; i<amat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       v = bmat->a;
1907       for (i=0; i<bmat->nz; i++) {
1908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1909       }
1910       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911       *norm = PetscSqrtReal(*norm);
1912     } else if (type == NORM_1) { /* max column norm */
1913       PetscReal *tmp,*tmp2;
1914       PetscInt  *jj,*garray = aij->garray;
1915       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1916       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1917       *norm = 0.0;
1918       v     = amat->a; jj = amat->j;
1919       for (j=0; j<amat->nz; j++) {
1920         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1921       }
1922       v = bmat->a; jj = bmat->j;
1923       for (j=0; j<bmat->nz; j++) {
1924         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1925       }
1926       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927       for (j=0; j<mat->cmap->N; j++) {
1928         if (tmp2[j] > *norm) *norm = tmp2[j];
1929       }
1930       ierr = PetscFree(tmp);CHKERRQ(ierr);
1931       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1932     } else if (type == NORM_INFINITY) { /* max row norm */
1933       PetscReal ntemp = 0.0;
1934       for (j=0; j<aij->A->rmap->n; j++) {
1935         v   = amat->a + amat->i[j];
1936         sum = 0.0;
1937         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         v = bmat->a + bmat->i[j];
1941         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1942           sum += PetscAbsScalar(*v); v++;
1943         }
1944         if (sum > ntemp) ntemp = sum;
1945       }
1946       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1947     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatTranspose_MPIAIJ"
1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1955 {
1956   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1957   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1958   PetscErrorCode ierr;
1959   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1960   PetscInt       cstart = A->cmap->rstart,ncol;
1961   Mat            B;
1962   MatScalar      *array;
1963 
1964   PetscFunctionBegin;
1965   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1966 
1967   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1968   ai = Aloc->i; aj = Aloc->j;
1969   bi = Bloc->i; bj = Bloc->j;
1970   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1971     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1972     PetscSFNode          *oloc;
1973     PETSC_UNUSED PetscSF sf;
1974 
1975     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1976     /* compute d_nnz for preallocation */
1977     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1978     for (i=0; i<ai[ma]; i++) {
1979       d_nnz[aj[i]]++;
1980       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1981     }
1982     /* compute local off-diagonal contributions */
1983     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1984     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1985     /* map those to global */
1986     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1987     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1988     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1989     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1990     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1991     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1992     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1993 
1994     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1995     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1996     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1997     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1998     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1999     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2000   } else {
2001     B    = *matout;
2002     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2003     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2004   }
2005 
2006   /* copy over the A part */
2007   array = Aloc->a;
2008   row   = A->rmap->rstart;
2009   for (i=0; i<ma; i++) {
2010     ncol = ai[i+1]-ai[i];
2011     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2012     row++;
2013     array += ncol; aj += ncol;
2014   }
2015   aj = Aloc->j;
2016   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2017 
2018   /* copy over the B part */
2019   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2020   array = Bloc->a;
2021   row   = A->rmap->rstart;
2022   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2023   cols_tmp = cols;
2024   for (i=0; i<mb; i++) {
2025     ncol = bi[i+1]-bi[i];
2026     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2027     row++;
2028     array += ncol; cols_tmp += ncol;
2029   }
2030   ierr = PetscFree(cols);CHKERRQ(ierr);
2031 
2032   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2034   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2035     *matout = B;
2036   } else {
2037     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2045 {
2046   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2047   Mat            a    = aij->A,b = aij->B;
2048   PetscErrorCode ierr;
2049   PetscInt       s1,s2,s3;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2053   if (rr) {
2054     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2055     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2056     /* Overlap communication with computation. */
2057     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2058   }
2059   if (ll) {
2060     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2061     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2062     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2063   }
2064   /* scale  the diagonal block */
2065   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2066 
2067   if (rr) {
2068     /* Do a scatter end and then right scale the off-diagonal block */
2069     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2078 {
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatEqual_MPIAIJ"
2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2090 {
2091   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2092   Mat            a,b,c,d;
2093   PetscBool      flg;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   a = matA->A; b = matA->B;
2098   c = matB->A; d = matB->B;
2099 
2100   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2101   if (flg) {
2102     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2103   }
2104   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatCopy_MPIAIJ"
2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2111 {
2112   PetscErrorCode ierr;
2113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2114   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2115 
2116   PetscFunctionBegin;
2117   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2118   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2119     /* because of the column compression in the off-processor part of the matrix a->B,
2120        the number of columns in a->B and b->B may be different, hence we cannot call
2121        the MatCopy() directly on the two parts. If need be, we can provide a more
2122        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2123        then copying the submatrices */
2124     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2125   } else {
2126     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2127     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatSetUp_MPIAIJ"
2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2135 {
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /*
2144    Computes the number of nonzeros per row needed for preallocation when X and Y
2145    have different nonzero structure.
2146 */
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2150 {
2151   PetscInt       i,j,k,nzx,nzy;
2152 
2153   PetscFunctionBegin;
2154   /* Set the number of nonzeros in the new matrix */
2155   for (i=0; i<m; i++) {
2156     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2157     nzx = xi[i+1] - xi[i];
2158     nzy = yi[i+1] - yi[i];
2159     nnz[i] = 0;
2160     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2161       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2162       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2163       nnz[i]++;
2164     }
2165     for (; k<nzy; k++) nnz[i]++;
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2174 {
2175   PetscErrorCode ierr;
2176   PetscInt       m = Y->rmap->N;
2177   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2178   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2179 
2180   PetscFunctionBegin;
2181   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ*)xx->B->data;
2211     y = (Mat_SeqAIJ*)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2223   } else {
2224     Mat      B;
2225     PetscInt *nnz_d,*nnz_o;
2226     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2227     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2228     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2229     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2230     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2231     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2232     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2234     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2236     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2237     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2239     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatConjugate_MPIAIJ"
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatRealPart_MPIAIJ"
2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2272   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #if defined(PETSC_HAVE_PBGL)
2290 
2291 #include <boost/parallel/mpi/bsp_process_group.hpp>
2292 #include <boost/graph/distributed/ilu_default_graph.hpp>
2293 #include <boost/graph/distributed/ilu_0_block.hpp>
2294 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295 #include <boost/graph/distributed/petsc/interface.hpp>
2296 #include <boost/multi_array.hpp>
2297 #include <boost/parallel/distributed_property_map->hpp>
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2301 /*
2302   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303 */
2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305 {
2306   namespace petsc = boost::distributed::petsc;
2307 
2308   namespace graph_dist = boost::graph::distributed;
2309   using boost::graph::distributed::ilu_default::process_group_type;
2310   using boost::graph::ilu_permuted;
2311 
2312   PetscBool      row_identity, col_identity;
2313   PetscContainer c;
2314   PetscInt       m, n, M, N;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2320   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2321   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2325   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type& level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2338   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341 
2342   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2343   ierr = PetscContainerSetPointer(c, lgraph_p);
2344   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2345   ierr = PetscContainerDestroy(&c);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatSolve_MPIAIJ"
2359 /*
2360   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2361 */
2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2363 {
2364   namespace graph_dist = boost::graph::distributed;
2365 
2366   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2367   lgraph_type    *lgraph_p;
2368   PetscContainer c;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2373   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2374   ierr = VecCopy(b, x);CHKERRQ(ierr);
2375 
2376   PetscScalar *array_x;
2377   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2378   PetscInt sx;
2379   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2380 
2381   PetscScalar *array_b;
2382   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2383   PetscInt sb;
2384   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2385 
2386   lgraph_type& level_graph = *lgraph_p;
2387   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2388 
2389   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2390   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2391   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2392 
2393   typedef boost::iterator_property_map<array_ref_type::iterator,
2394                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2395   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2396   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2397 
2398   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2406 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2407 {
2408   PetscMPIInt    rank,size;
2409   MPI_Comm       comm;
2410   PetscErrorCode ierr;
2411   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2412   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2413   PetscInt       *rowrange = mat->rmap->range;
2414   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2415   Mat            A = aij->A,B=aij->B,C=*matredundant;
2416   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2417   PetscScalar    *sbuf_a;
2418   PetscInt       nzlocal=a->nz+b->nz;
2419   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2420   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2421   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2422   MatScalar      *aworkA,*aworkB;
2423   PetscScalar    *vals;
2424   PetscMPIInt    tag1,tag2,tag3,imdex;
2425   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2426   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2427   MPI_Status     recv_status,*send_status;
2428   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2429   PetscInt       **rbuf_j=NULL;
2430   PetscScalar    **rbuf_a=NULL;
2431   Mat_Redundant  *redund =NULL;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2435   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2437   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2438   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2439 
2440   if (reuse == MAT_REUSE_MATRIX) {
2441     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2442     if (subsize == 1) {
2443       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2444       redund = c->redundant;
2445     } else {
2446       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2447       redund = c->redundant;
2448     }
2449     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2450 
2451     nsends    = redund->nsends;
2452     nrecvs    = redund->nrecvs;
2453     send_rank = redund->send_rank;
2454     recv_rank = redund->recv_rank;
2455     sbuf_nz   = redund->sbuf_nz;
2456     rbuf_nz   = redund->rbuf_nz;
2457     sbuf_j    = redund->sbuf_j;
2458     sbuf_a    = redund->sbuf_a;
2459     rbuf_j    = redund->rbuf_j;
2460     rbuf_a    = redund->rbuf_a;
2461   }
2462 
2463   if (reuse == MAT_INITIAL_MATRIX) {
2464     PetscInt    nleftover,np_subcomm;
2465 
2466     /* get the destination processors' id send_rank, nsends and nrecvs */
2467     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2468 
2469     np_subcomm = size/nsubcomm;
2470     nleftover  = size - nsubcomm*np_subcomm;
2471 
2472     /* block of codes below is specific for INTERLACED */
2473     /* ------------------------------------------------*/
2474     nsends = 0; nrecvs = 0;
2475     for (i=0; i<size; i++) {
2476       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2477         send_rank[nsends++] = i;
2478         recv_rank[nrecvs++] = i;
2479       }
2480     }
2481     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2482       i = size-nleftover-1;
2483       j = 0;
2484       while (j < nsubcomm - nleftover) {
2485         send_rank[nsends++] = i;
2486         i--; j++;
2487       }
2488     }
2489 
2490     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2491       for (i=0; i<nleftover; i++) {
2492         recv_rank[nrecvs++] = size-nleftover+i;
2493       }
2494     }
2495     /*----------------------------------------------*/
2496 
2497     /* allocate sbuf_j, sbuf_a */
2498     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2499     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2500     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2501     /*
2502     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2503     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2504      */
2505   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2506 
2507   /* copy mat's local entries into the buffers */
2508   if (reuse == MAT_INITIAL_MATRIX) {
2509     rownz_max = 0;
2510     rptr      = sbuf_j;
2511     cols      = sbuf_j + rend-rstart + 1;
2512     vals      = sbuf_a;
2513     rptr[0]   = 0;
2514     for (i=0; i<rend-rstart; i++) {
2515       row    = i + rstart;
2516       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2517       ncols  = nzA + nzB;
2518       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2519       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2520       /* load the column indices for this row into cols */
2521       lwrite = 0;
2522       for (l=0; l<nzB; l++) {
2523         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2524           vals[lwrite]   = aworkB[l];
2525           cols[lwrite++] = ctmp;
2526         }
2527       }
2528       for (l=0; l<nzA; l++) {
2529         vals[lwrite]   = aworkA[l];
2530         cols[lwrite++] = cstart + cworkA[l];
2531       }
2532       for (l=0; l<nzB; l++) {
2533         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2534           vals[lwrite]   = aworkB[l];
2535           cols[lwrite++] = ctmp;
2536         }
2537       }
2538       vals     += ncols;
2539       cols     += ncols;
2540       rptr[i+1] = rptr[i] + ncols;
2541       if (rownz_max < ncols) rownz_max = ncols;
2542     }
2543     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2544   } else { /* only copy matrix values into sbuf_a */
2545     rptr    = sbuf_j;
2546     vals    = sbuf_a;
2547     rptr[0] = 0;
2548     for (i=0; i<rend-rstart; i++) {
2549       row    = i + rstart;
2550       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2551       ncols  = nzA + nzB;
2552       cworkB = b->j + b->i[i];
2553       aworkA = a->a + a->i[i];
2554       aworkB = b->a + b->i[i];
2555       lwrite = 0;
2556       for (l=0; l<nzB; l++) {
2557         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2558       }
2559       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2560       for (l=0; l<nzB; l++) {
2561         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2562       }
2563       vals     += ncols;
2564       rptr[i+1] = rptr[i] + ncols;
2565     }
2566   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2567 
2568   /* send nzlocal to others, and recv other's nzlocal */
2569   /*--------------------------------------------------*/
2570   if (reuse == MAT_INITIAL_MATRIX) {
2571     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2572 
2573     s_waits2 = s_waits3 + nsends;
2574     s_waits1 = s_waits2 + nsends;
2575     r_waits1 = s_waits1 + nsends;
2576     r_waits2 = r_waits1 + nrecvs;
2577     r_waits3 = r_waits2 + nrecvs;
2578   } else {
2579     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2580 
2581     r_waits3 = s_waits3 + nsends;
2582   }
2583 
2584   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2585   if (reuse == MAT_INITIAL_MATRIX) {
2586     /* get new tags to keep the communication clean */
2587     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2588     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2589     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2590 
2591     /* post receives of other's nzlocal */
2592     for (i=0; i<nrecvs; i++) {
2593       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2594     }
2595     /* send nzlocal to others */
2596     for (i=0; i<nsends; i++) {
2597       sbuf_nz[i] = nzlocal;
2598       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2599     }
2600     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2601     count = nrecvs;
2602     while (count) {
2603       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2604 
2605       recv_rank[imdex] = recv_status.MPI_SOURCE;
2606       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2607       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2608 
2609       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2610 
2611       rbuf_nz[imdex] += i + 2;
2612 
2613       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2614       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2615       count--;
2616     }
2617     /* wait on sends of nzlocal */
2618     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2619     /* send mat->i,j to others, and recv from other's */
2620     /*------------------------------------------------*/
2621     for (i=0; i<nsends; i++) {
2622       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2623       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2624     }
2625     /* wait on receives of mat->i,j */
2626     /*------------------------------*/
2627     count = nrecvs;
2628     while (count) {
2629       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2630       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2631       count--;
2632     }
2633     /* wait on sends of mat->i,j */
2634     /*---------------------------*/
2635     if (nsends) {
2636       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2637     }
2638   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2639 
2640   /* post receives, send and receive mat->a */
2641   /*----------------------------------------*/
2642   for (imdex=0; imdex<nrecvs; imdex++) {
2643     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2644   }
2645   for (i=0; i<nsends; i++) {
2646     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2647   }
2648   count = nrecvs;
2649   while (count) {
2650     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2651     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2652     count--;
2653   }
2654   if (nsends) {
2655     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2656   }
2657 
2658   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2659 
2660   /* create redundant matrix */
2661   /*-------------------------*/
2662   if (reuse == MAT_INITIAL_MATRIX) {
2663     const PetscInt *range;
2664     PetscInt       rstart_sub,rend_sub,mloc_sub;
2665 
2666     /* compute rownz_max for preallocation */
2667     for (imdex=0; imdex<nrecvs; imdex++) {
2668       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2669       rptr = rbuf_j[imdex];
2670       for (i=0; i<j; i++) {
2671         ncols = rptr[i+1] - rptr[i];
2672         if (rownz_max < ncols) rownz_max = ncols;
2673       }
2674     }
2675 
2676     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2677 
2678     /* get local size of redundant matrix
2679        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2680     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2681     rstart_sub = range[nsubcomm*subrank];
2682     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2683       rend_sub = range[nsubcomm*(subrank+1)];
2684     } else {
2685       rend_sub = mat->rmap->N;
2686     }
2687     mloc_sub = rend_sub - rstart_sub;
2688 
2689     if (M == N) {
2690       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2691     } else { /* non-square matrix */
2692       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2693     }
2694     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2695     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2696     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2697     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2698   } else {
2699     C = *matredundant;
2700   }
2701 
2702   /* insert local matrix entries */
2703   rptr = sbuf_j;
2704   cols = sbuf_j + rend-rstart + 1;
2705   vals = sbuf_a;
2706   for (i=0; i<rend-rstart; i++) {
2707     row   = i + rstart;
2708     ncols = rptr[i+1] - rptr[i];
2709     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2710     vals += ncols;
2711     cols += ncols;
2712   }
2713   /* insert received matrix entries */
2714   for (imdex=0; imdex<nrecvs; imdex++) {
2715     rstart = rowrange[recv_rank[imdex]];
2716     rend   = rowrange[recv_rank[imdex]+1];
2717     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2718     rptr   = rbuf_j[imdex];
2719     cols   = rbuf_j[imdex] + rend-rstart + 1;
2720     vals   = rbuf_a[imdex];
2721     for (i=0; i<rend-rstart; i++) {
2722       row   = i + rstart;
2723       ncols = rptr[i+1] - rptr[i];
2724       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2725       vals += ncols;
2726       cols += ncols;
2727     }
2728   }
2729   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2730   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2731 
2732   if (reuse == MAT_INITIAL_MATRIX) {
2733     *matredundant = C;
2734 
2735     /* create a supporting struct and attach it to C for reuse */
2736     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2737     if (subsize == 1) {
2738       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2739       c->redundant = redund;
2740     } else {
2741       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2742       c->redundant = redund;
2743     }
2744 
2745     redund->nzlocal   = nzlocal;
2746     redund->nsends    = nsends;
2747     redund->nrecvs    = nrecvs;
2748     redund->send_rank = send_rank;
2749     redund->recv_rank = recv_rank;
2750     redund->sbuf_nz   = sbuf_nz;
2751     redund->rbuf_nz   = rbuf_nz;
2752     redund->sbuf_j    = sbuf_j;
2753     redund->sbuf_a    = sbuf_a;
2754     redund->rbuf_j    = rbuf_j;
2755     redund->rbuf_a    = rbuf_a;
2756     redund->psubcomm  = NULL;
2757   }
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2763 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2764 {
2765   PetscErrorCode ierr;
2766   MPI_Comm       comm;
2767   PetscMPIInt    size,subsize;
2768   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2769   Mat_Redundant  *redund=NULL;
2770   PetscSubcomm   psubcomm=NULL;
2771   MPI_Comm       subcomm_in=subcomm;
2772   Mat            *matseq;
2773   IS             isrow,iscol;
2774 
2775   PetscFunctionBegin;
2776   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2777     if (reuse ==  MAT_INITIAL_MATRIX) {
2778       /* create psubcomm, then get subcomm */
2779       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2780       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2781       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2782 
2783       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2784       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2785       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2786       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2787       subcomm = psubcomm->comm;
2788     } else { /* retrieve psubcomm and subcomm */
2789       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2790       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2791       if (subsize == 1) {
2792         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2793         redund = c->redundant;
2794       } else {
2795         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2796         redund = c->redundant;
2797       }
2798       psubcomm = redund->psubcomm;
2799     }
2800     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2801       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2802       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2803         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2804         if (subsize == 1) {
2805           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2806           c->redundant->psubcomm = psubcomm;
2807         } else {
2808           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2809           c->redundant->psubcomm = psubcomm ;
2810         }
2811       }
2812       PetscFunctionReturn(0);
2813     }
2814   }
2815 
2816   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2817   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2818   if (reuse == MAT_INITIAL_MATRIX) {
2819     /* create a local sequential matrix matseq[0] */
2820     mloc_sub = PETSC_DECIDE;
2821     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2822     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2823     rstart = rend - mloc_sub;
2824     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2825     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2826   } else { /* reuse == MAT_REUSE_MATRIX */
2827     if (subsize == 1) {
2828       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2829       redund = c->redundant;
2830     } else {
2831       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2832       redund = c->redundant;
2833     }
2834 
2835     isrow  = redund->isrow;
2836     iscol  = redund->iscol;
2837     matseq = redund->matseq;
2838   }
2839   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2840   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2841 
2842   if (reuse == MAT_INITIAL_MATRIX) {
2843     /* create a supporting struct and attach it to C for reuse */
2844     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2845     if (subsize == 1) {
2846       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2847       c->redundant = redund;
2848     } else {
2849       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2850       c->redundant = redund;
2851     }
2852     redund->isrow    = isrow;
2853     redund->iscol    = iscol;
2854     redund->matseq   = matseq;
2855     redund->psubcomm = psubcomm;
2856   }
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2862 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2863 {
2864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2865   PetscErrorCode ierr;
2866   PetscInt       i,*idxb = 0;
2867   PetscScalar    *va,*vb;
2868   Vec            vtmp;
2869 
2870   PetscFunctionBegin;
2871   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2872   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2873   if (idx) {
2874     for (i=0; i<A->rmap->n; i++) {
2875       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2876     }
2877   }
2878 
2879   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2880   if (idx) {
2881     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2882   }
2883   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2884   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2885 
2886   for (i=0; i<A->rmap->n; i++) {
2887     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2888       va[i] = vb[i];
2889       if (idx) idx[i] = a->garray[idxb[i]];
2890     }
2891   }
2892 
2893   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2894   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2895   ierr = PetscFree(idxb);CHKERRQ(ierr);
2896   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 #undef __FUNCT__
2901 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2902 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2903 {
2904   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2905   PetscErrorCode ierr;
2906   PetscInt       i,*idxb = 0;
2907   PetscScalar    *va,*vb;
2908   Vec            vtmp;
2909 
2910   PetscFunctionBegin;
2911   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2912   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2913   if (idx) {
2914     for (i=0; i<A->cmap->n; i++) {
2915       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2916     }
2917   }
2918 
2919   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2920   if (idx) {
2921     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2922   }
2923   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2924   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2925 
2926   for (i=0; i<A->rmap->n; i++) {
2927     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2928       va[i] = vb[i];
2929       if (idx) idx[i] = a->garray[idxb[i]];
2930     }
2931   }
2932 
2933   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2934   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2935   ierr = PetscFree(idxb);CHKERRQ(ierr);
2936   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2942 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2943 {
2944   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2945   PetscInt       n      = A->rmap->n;
2946   PetscInt       cstart = A->cmap->rstart;
2947   PetscInt       *cmap  = mat->garray;
2948   PetscInt       *diagIdx, *offdiagIdx;
2949   Vec            diagV, offdiagV;
2950   PetscScalar    *a, *diagA, *offdiagA;
2951   PetscInt       r;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2956   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2957   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2958   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2959   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2960   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2961   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2962   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2963   for (r = 0; r < n; ++r) {
2964     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2965       a[r]   = diagA[r];
2966       idx[r] = cstart + diagIdx[r];
2967     } else {
2968       a[r]   = offdiagA[r];
2969       idx[r] = cmap[offdiagIdx[r]];
2970     }
2971   }
2972   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2973   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2974   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2975   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2976   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2977   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2983 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2984 {
2985   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2986   PetscInt       n      = A->rmap->n;
2987   PetscInt       cstart = A->cmap->rstart;
2988   PetscInt       *cmap  = mat->garray;
2989   PetscInt       *diagIdx, *offdiagIdx;
2990   Vec            diagV, offdiagV;
2991   PetscScalar    *a, *diagA, *offdiagA;
2992   PetscInt       r;
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2997   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2998   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2999   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3000   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3001   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3002   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3003   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3004   for (r = 0; r < n; ++r) {
3005     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3006       a[r]   = diagA[r];
3007       idx[r] = cstart + diagIdx[r];
3008     } else {
3009       a[r]   = offdiagA[r];
3010       idx[r] = cmap[offdiagIdx[r]];
3011     }
3012   }
3013   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3014   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3015   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3016   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3017   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3018   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 #undef __FUNCT__
3023 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3024 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3025 {
3026   PetscErrorCode ierr;
3027   Mat            *dummy;
3028 
3029   PetscFunctionBegin;
3030   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3031   *newmat = *dummy;
3032   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3038 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3039 {
3040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3041   PetscErrorCode ierr;
3042 
3043   PetscFunctionBegin;
3044   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3050 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3051 {
3052   PetscErrorCode ierr;
3053   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3054 
3055   PetscFunctionBegin;
3056   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3057   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3058   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3059   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 /* -------------------------------------------------------------------*/
3064 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3065                                        MatGetRow_MPIAIJ,
3066                                        MatRestoreRow_MPIAIJ,
3067                                        MatMult_MPIAIJ,
3068                                 /* 4*/ MatMultAdd_MPIAIJ,
3069                                        MatMultTranspose_MPIAIJ,
3070                                        MatMultTransposeAdd_MPIAIJ,
3071 #if defined(PETSC_HAVE_PBGL)
3072                                        MatSolve_MPIAIJ,
3073 #else
3074                                        0,
3075 #endif
3076                                        0,
3077                                        0,
3078                                 /*10*/ 0,
3079                                        0,
3080                                        0,
3081                                        MatSOR_MPIAIJ,
3082                                        MatTranspose_MPIAIJ,
3083                                 /*15*/ MatGetInfo_MPIAIJ,
3084                                        MatEqual_MPIAIJ,
3085                                        MatGetDiagonal_MPIAIJ,
3086                                        MatDiagonalScale_MPIAIJ,
3087                                        MatNorm_MPIAIJ,
3088                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3089                                        MatAssemblyEnd_MPIAIJ,
3090                                        MatSetOption_MPIAIJ,
3091                                        MatZeroEntries_MPIAIJ,
3092                                 /*24*/ MatZeroRows_MPIAIJ,
3093                                        0,
3094 #if defined(PETSC_HAVE_PBGL)
3095                                        0,
3096 #else
3097                                        0,
3098 #endif
3099                                        0,
3100                                        0,
3101                                 /*29*/ MatSetUp_MPIAIJ,
3102 #if defined(PETSC_HAVE_PBGL)
3103                                        0,
3104 #else
3105                                        0,
3106 #endif
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*34*/ MatDuplicate_MPIAIJ,
3111                                        0,
3112                                        0,
3113                                        0,
3114                                        0,
3115                                 /*39*/ MatAXPY_MPIAIJ,
3116                                        MatGetSubMatrices_MPIAIJ,
3117                                        MatIncreaseOverlap_MPIAIJ,
3118                                        MatGetValues_MPIAIJ,
3119                                        MatCopy_MPIAIJ,
3120                                 /*44*/ MatGetRowMax_MPIAIJ,
3121                                        MatScale_MPIAIJ,
3122                                        0,
3123                                        MatDiagonalSet_MPIAIJ,
3124                                        MatZeroRowsColumns_MPIAIJ,
3125                                 /*49*/ MatSetRandom_MPIAIJ,
3126                                        0,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3131                                        0,
3132                                        MatSetUnfactored_MPIAIJ,
3133                                        MatPermute_MPIAIJ,
3134                                        0,
3135                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3136                                        MatDestroy_MPIAIJ,
3137                                        MatView_MPIAIJ,
3138                                        0,
3139                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3140                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3141                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3146                                        MatGetRowMinAbs_MPIAIJ,
3147                                        0,
3148                                        MatSetColoring_MPIAIJ,
3149                                        0,
3150                                        MatSetValuesAdifor_MPIAIJ,
3151                                 /*75*/ MatFDColoringApply_AIJ,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        MatFindZeroDiagonals_MPIAIJ,
3156                                 /*80*/ 0,
3157                                        0,
3158                                        0,
3159                                 /*83*/ MatLoad_MPIAIJ,
3160                                        0,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        0,
3165                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3166                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3167                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3168                                        MatPtAP_MPIAIJ_MPIAIJ,
3169                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3170                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                 /*99*/ 0,
3176                                        0,
3177                                        0,
3178                                        MatConjugate_MPIAIJ,
3179                                        0,
3180                                 /*104*/MatSetValuesRow_MPIAIJ,
3181                                        MatRealPart_MPIAIJ,
3182                                        MatImaginaryPart_MPIAIJ,
3183                                        0,
3184                                        0,
3185                                 /*109*/0,
3186                                        MatGetRedundantMatrix_MPIAIJ,
3187                                        MatGetRowMin_MPIAIJ,
3188                                        0,
3189                                        0,
3190                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                 /*119*/0,
3196                                        0,
3197                                        0,
3198                                        0,
3199                                        MatGetMultiProcBlock_MPIAIJ,
3200                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3201                                        MatGetColumnNorms_MPIAIJ,
3202                                        MatInvertBlockDiagonal_MPIAIJ,
3203                                        0,
3204                                        MatGetSubMatricesParallel_MPIAIJ,
3205                                 /*129*/0,
3206                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3207                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3208                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3209                                        0,
3210                                 /*134*/0,
3211                                        0,
3212                                        0,
3213                                        0,
3214                                        0,
3215                                 /*139*/0,
3216                                        0,
3217                                        0,
3218                                        MatFDColoringSetUp_MPIXAIJ
3219 };
3220 
3221 /* ----------------------------------------------------------------------------------------*/
3222 
3223 #undef __FUNCT__
3224 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3225 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3226 {
3227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3232   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 #undef __FUNCT__
3237 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3238 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3239 {
3240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3241   PetscErrorCode ierr;
3242 
3243   PetscFunctionBegin;
3244   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3245   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3251 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3252 {
3253   Mat_MPIAIJ     *b;
3254   PetscErrorCode ierr;
3255 
3256   PetscFunctionBegin;
3257   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3258   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3259   b = (Mat_MPIAIJ*)B->data;
3260 
3261   if (!B->preallocated) {
3262     /* Explicitly create 2 MATSEQAIJ matrices. */
3263     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3264     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3265     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3266     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3267     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3268     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3269     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3270     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3271     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3272     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3273   }
3274 
3275   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3276   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3277   B->preallocated = PETSC_TRUE;
3278   PetscFunctionReturn(0);
3279 }
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3283 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3284 {
3285   Mat            mat;
3286   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3287   PetscErrorCode ierr;
3288 
3289   PetscFunctionBegin;
3290   *newmat = 0;
3291   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3292   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3293   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3294   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3295   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3296   a       = (Mat_MPIAIJ*)mat->data;
3297 
3298   mat->factortype   = matin->factortype;
3299   mat->assembled    = PETSC_TRUE;
3300   mat->insertmode   = NOT_SET_VALUES;
3301   mat->preallocated = PETSC_TRUE;
3302 
3303   a->size         = oldmat->size;
3304   a->rank         = oldmat->rank;
3305   a->donotstash   = oldmat->donotstash;
3306   a->roworiented  = oldmat->roworiented;
3307   a->rowindices   = 0;
3308   a->rowvalues    = 0;
3309   a->getrowactive = PETSC_FALSE;
3310 
3311   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3312   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3313 
3314   if (oldmat->colmap) {
3315 #if defined(PETSC_USE_CTABLE)
3316     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3317 #else
3318     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3319     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3320     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3321 #endif
3322   } else a->colmap = 0;
3323   if (oldmat->garray) {
3324     PetscInt len;
3325     len  = oldmat->B->cmap->n;
3326     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3327     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3328     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3329   } else a->garray = 0;
3330 
3331   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3332   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3333   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3334   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3335   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3336   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3337   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3338   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3339   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3340   *newmat = mat;
3341   PetscFunctionReturn(0);
3342 }
3343 
3344 
3345 
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatLoad_MPIAIJ"
3348 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3349 {
3350   PetscScalar    *vals,*svals;
3351   MPI_Comm       comm;
3352   PetscErrorCode ierr;
3353   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3354   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3355   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3356   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3357   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3358   int            fd;
3359   PetscInt       bs = 1;
3360 
3361   PetscFunctionBegin;
3362   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3363   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3364   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3365   if (!rank) {
3366     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3367     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3368     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3369   }
3370 
3371   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3372   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3373   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3374 
3375   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3376 
3377   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3378   M    = header[1]; N = header[2];
3379   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3380   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3381   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3382 
3383   /* If global sizes are set, check if they are consistent with that given in the file */
3384   if (sizesset) {
3385     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3386   }
3387   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3388   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3389 
3390   /* determine ownership of all (block) rows */
3391   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3392   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3393   else m = newMat->rmap->n; /* Set by user */
3394 
3395   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3396   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3397 
3398   /* First process needs enough room for process with most rows */
3399   if (!rank) {
3400     mmax = rowners[1];
3401     for (i=2; i<=size; i++) {
3402       mmax = PetscMax(mmax, rowners[i]);
3403     }
3404   } else mmax = -1;             /* unused, but compilers complain */
3405 
3406   rowners[0] = 0;
3407   for (i=2; i<=size; i++) {
3408     rowners[i] += rowners[i-1];
3409   }
3410   rstart = rowners[rank];
3411   rend   = rowners[rank+1];
3412 
3413   /* distribute row lengths to all processors */
3414   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3415   if (!rank) {
3416     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3417     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3418     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3419     for (j=0; j<m; j++) {
3420       procsnz[0] += ourlens[j];
3421     }
3422     for (i=1; i<size; i++) {
3423       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3424       /* calculate the number of nonzeros on each processor */
3425       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3426         procsnz[i] += rowlengths[j];
3427       }
3428       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3429     }
3430     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3431   } else {
3432     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3433   }
3434 
3435   if (!rank) {
3436     /* determine max buffer needed and allocate it */
3437     maxnz = 0;
3438     for (i=0; i<size; i++) {
3439       maxnz = PetscMax(maxnz,procsnz[i]);
3440     }
3441     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3442 
3443     /* read in my part of the matrix column indices  */
3444     nz   = procsnz[0];
3445     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3446     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3447 
3448     /* read in every one elses and ship off */
3449     for (i=1; i<size; i++) {
3450       nz   = procsnz[i];
3451       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3452       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3453     }
3454     ierr = PetscFree(cols);CHKERRQ(ierr);
3455   } else {
3456     /* determine buffer space needed for message */
3457     nz = 0;
3458     for (i=0; i<m; i++) {
3459       nz += ourlens[i];
3460     }
3461     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3462 
3463     /* receive message of column indices*/
3464     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3465   }
3466 
3467   /* determine column ownership if matrix is not square */
3468   if (N != M) {
3469     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3470     else n = newMat->cmap->n;
3471     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3472     cstart = cend - n;
3473   } else {
3474     cstart = rstart;
3475     cend   = rend;
3476     n      = cend - cstart;
3477   }
3478 
3479   /* loop over local rows, determining number of off diagonal entries */
3480   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3481   jj   = 0;
3482   for (i=0; i<m; i++) {
3483     for (j=0; j<ourlens[i]; j++) {
3484       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3485       jj++;
3486     }
3487   }
3488 
3489   for (i=0; i<m; i++) {
3490     ourlens[i] -= offlens[i];
3491   }
3492   if (!sizesset) {
3493     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3494   }
3495 
3496   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3497 
3498   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3499 
3500   for (i=0; i<m; i++) {
3501     ourlens[i] += offlens[i];
3502   }
3503 
3504   if (!rank) {
3505     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3506 
3507     /* read in my part of the matrix numerical values  */
3508     nz   = procsnz[0];
3509     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3510 
3511     /* insert into matrix */
3512     jj      = rstart;
3513     smycols = mycols;
3514     svals   = vals;
3515     for (i=0; i<m; i++) {
3516       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3517       smycols += ourlens[i];
3518       svals   += ourlens[i];
3519       jj++;
3520     }
3521 
3522     /* read in other processors and ship out */
3523     for (i=1; i<size; i++) {
3524       nz   = procsnz[i];
3525       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3526       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3527     }
3528     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3529   } else {
3530     /* receive numeric values */
3531     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3532 
3533     /* receive message of values*/
3534     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3535 
3536     /* insert into matrix */
3537     jj      = rstart;
3538     smycols = mycols;
3539     svals   = vals;
3540     for (i=0; i<m; i++) {
3541       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3542       smycols += ourlens[i];
3543       svals   += ourlens[i];
3544       jj++;
3545     }
3546   }
3547   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3548   ierr = PetscFree(vals);CHKERRQ(ierr);
3549   ierr = PetscFree(mycols);CHKERRQ(ierr);
3550   ierr = PetscFree(rowners);CHKERRQ(ierr);
3551   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3552   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 #undef __FUNCT__
3557 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3558 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3559 {
3560   PetscErrorCode ierr;
3561   IS             iscol_local;
3562   PetscInt       csize;
3563 
3564   PetscFunctionBegin;
3565   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3566   if (call == MAT_REUSE_MATRIX) {
3567     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3568     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3569   } else {
3570     PetscInt cbs;
3571     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3572     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3573     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3574   }
3575   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3576   if (call == MAT_INITIAL_MATRIX) {
3577     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3578     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3586 /*
3587     Not great since it makes two copies of the submatrix, first an SeqAIJ
3588   in local and then by concatenating the local matrices the end result.
3589   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3590 
3591   Note: This requires a sequential iscol with all indices.
3592 */
3593 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3594 {
3595   PetscErrorCode ierr;
3596   PetscMPIInt    rank,size;
3597   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3598   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3599   PetscBool      allcolumns, colflag;
3600   Mat            M,Mreuse;
3601   MatScalar      *vwork,*aa;
3602   MPI_Comm       comm;
3603   Mat_SeqAIJ     *aij;
3604 
3605   PetscFunctionBegin;
3606   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3607   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3608   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3609 
3610   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3611   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3612   if (colflag && ncol == mat->cmap->N) {
3613     allcolumns = PETSC_TRUE;
3614   } else {
3615     allcolumns = PETSC_FALSE;
3616   }
3617   if (call ==  MAT_REUSE_MATRIX) {
3618     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3619     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3620     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3621   } else {
3622     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3623   }
3624 
3625   /*
3626       m - number of local rows
3627       n - number of columns (same on all processors)
3628       rstart - first row in new global matrix generated
3629   */
3630   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3631   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3632   if (call == MAT_INITIAL_MATRIX) {
3633     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3634     ii  = aij->i;
3635     jj  = aij->j;
3636 
3637     /*
3638         Determine the number of non-zeros in the diagonal and off-diagonal
3639         portions of the matrix in order to do correct preallocation
3640     */
3641 
3642     /* first get start and end of "diagonal" columns */
3643     if (csize == PETSC_DECIDE) {
3644       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3645       if (mglobal == n) { /* square matrix */
3646         nlocal = m;
3647       } else {
3648         nlocal = n/size + ((n % size) > rank);
3649       }
3650     } else {
3651       nlocal = csize;
3652     }
3653     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3654     rstart = rend - nlocal;
3655     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3656 
3657     /* next, compute all the lengths */
3658     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3659     olens = dlens + m;
3660     for (i=0; i<m; i++) {
3661       jend = ii[i+1] - ii[i];
3662       olen = 0;
3663       dlen = 0;
3664       for (j=0; j<jend; j++) {
3665         if (*jj < rstart || *jj >= rend) olen++;
3666         else dlen++;
3667         jj++;
3668       }
3669       olens[i] = olen;
3670       dlens[i] = dlen;
3671     }
3672     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3673     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3674     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3675     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3676     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3677     ierr = PetscFree(dlens);CHKERRQ(ierr);
3678   } else {
3679     PetscInt ml,nl;
3680 
3681     M    = *newmat;
3682     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3683     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3684     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3685     /*
3686          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3687        rather than the slower MatSetValues().
3688     */
3689     M->was_assembled = PETSC_TRUE;
3690     M->assembled     = PETSC_FALSE;
3691   }
3692   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3693   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3694   ii   = aij->i;
3695   jj   = aij->j;
3696   aa   = aij->a;
3697   for (i=0; i<m; i++) {
3698     row   = rstart + i;
3699     nz    = ii[i+1] - ii[i];
3700     cwork = jj;     jj += nz;
3701     vwork = aa;     aa += nz;
3702     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3703   }
3704 
3705   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3706   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   *newmat = M;
3708 
3709   /* save submatrix used in processor for next request */
3710   if (call ==  MAT_INITIAL_MATRIX) {
3711     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3712     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3713   }
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 #undef __FUNCT__
3718 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3719 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3720 {
3721   PetscInt       m,cstart, cend,j,nnz,i,d;
3722   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3723   const PetscInt *JJ;
3724   PetscScalar    *values;
3725   PetscErrorCode ierr;
3726 
3727   PetscFunctionBegin;
3728   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3729 
3730   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3731   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3732   m      = B->rmap->n;
3733   cstart = B->cmap->rstart;
3734   cend   = B->cmap->rend;
3735   rstart = B->rmap->rstart;
3736 
3737   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3738 
3739 #if defined(PETSC_USE_DEBUGGING)
3740   for (i=0; i<m; i++) {
3741     nnz = Ii[i+1]- Ii[i];
3742     JJ  = J + Ii[i];
3743     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3744     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3745     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3746   }
3747 #endif
3748 
3749   for (i=0; i<m; i++) {
3750     nnz     = Ii[i+1]- Ii[i];
3751     JJ      = J + Ii[i];
3752     nnz_max = PetscMax(nnz_max,nnz);
3753     d       = 0;
3754     for (j=0; j<nnz; j++) {
3755       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3756     }
3757     d_nnz[i] = d;
3758     o_nnz[i] = nnz - d;
3759   }
3760   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3761   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3762 
3763   if (v) values = (PetscScalar*)v;
3764   else {
3765     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3766   }
3767 
3768   for (i=0; i<m; i++) {
3769     ii   = i + rstart;
3770     nnz  = Ii[i+1]- Ii[i];
3771     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3772   }
3773   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3774   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775 
3776   if (!v) {
3777     ierr = PetscFree(values);CHKERRQ(ierr);
3778   }
3779   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3780   PetscFunctionReturn(0);
3781 }
3782 
3783 #undef __FUNCT__
3784 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3785 /*@
3786    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3787    (the default parallel PETSc format).
3788 
3789    Collective on MPI_Comm
3790 
3791    Input Parameters:
3792 +  B - the matrix
3793 .  i - the indices into j for the start of each local row (starts with zero)
3794 .  j - the column indices for each local row (starts with zero)
3795 -  v - optional values in the matrix
3796 
3797    Level: developer
3798 
3799    Notes:
3800        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3801      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3802      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3803 
3804        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3805 
3806        The format which is used for the sparse matrix input, is equivalent to a
3807     row-major ordering.. i.e for the following matrix, the input data expected is
3808     as shown:
3809 
3810         1 0 0
3811         2 0 3     P0
3812        -------
3813         4 5 6     P1
3814 
3815      Process0 [P0]: rows_owned=[0,1]
3816         i =  {0,1,3}  [size = nrow+1  = 2+1]
3817         j =  {0,0,2}  [size = nz = 6]
3818         v =  {1,2,3}  [size = nz = 6]
3819 
3820      Process1 [P1]: rows_owned=[2]
3821         i =  {0,3}    [size = nrow+1  = 1+1]
3822         j =  {0,1,2}  [size = nz = 6]
3823         v =  {4,5,6}  [size = nz = 6]
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3828           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3829 @*/
3830 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3831 {
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3841 /*@C
3842    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3843    (the default parallel PETSc format).  For good matrix assembly performance
3844    the user should preallocate the matrix storage by setting the parameters
3845    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3846    performance can be increased by more than a factor of 50.
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  B - the matrix
3852 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3853            (same value is used for all local rows)
3854 .  d_nnz - array containing the number of nonzeros in the various rows of the
3855            DIAGONAL portion of the local submatrix (possibly different for each row)
3856            or NULL, if d_nz is used to specify the nonzero structure.
3857            The size of this array is equal to the number of local rows, i.e 'm'.
3858            For matrices that will be factored, you must leave room for (and set)
3859            the diagonal entry even if it is zero.
3860 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3861            submatrix (same value is used for all local rows).
3862 -  o_nnz - array containing the number of nonzeros in the various rows of the
3863            OFF-DIAGONAL portion of the local submatrix (possibly different for
3864            each row) or NULL, if o_nz is used to specify the nonzero
3865            structure. The size of this array is equal to the number
3866            of local rows, i.e 'm'.
3867 
3868    If the *_nnz parameter is given then the *_nz parameter is ignored
3869 
3870    The AIJ format (also called the Yale sparse matrix format or
3871    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3872    storage.  The stored row and column indices begin with zero.
3873    See Users-Manual: ch_mat for details.
3874 
3875    The parallel matrix is partitioned such that the first m0 rows belong to
3876    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3877    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3878 
3879    The DIAGONAL portion of the local submatrix of a processor can be defined
3880    as the submatrix which is obtained by extraction the part corresponding to
3881    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3882    first row that belongs to the processor, r2 is the last row belonging to
3883    the this processor, and c1-c2 is range of indices of the local part of a
3884    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3885    common case of a square matrix, the row and column ranges are the same and
3886    the DIAGONAL part is also square. The remaining portion of the local
3887    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3888 
3889    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3890 
3891    You can call MatGetInfo() to get information on how effective the preallocation was;
3892    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3893    You can also run with the option -info and look for messages with the string
3894    malloc in them to see if additional memory allocation was needed.
3895 
3896    Example usage:
3897 
3898    Consider the following 8x8 matrix with 34 non-zero values, that is
3899    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3900    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3901    as follows:
3902 
3903 .vb
3904             1  2  0  |  0  3  0  |  0  4
3905     Proc0   0  5  6  |  7  0  0  |  8  0
3906             9  0 10  | 11  0  0  | 12  0
3907     -------------------------------------
3908            13  0 14  | 15 16 17  |  0  0
3909     Proc1   0 18  0  | 19 20 21  |  0  0
3910             0  0  0  | 22 23  0  | 24  0
3911     -------------------------------------
3912     Proc2  25 26 27  |  0  0 28  | 29  0
3913            30  0  0  | 31 32 33  |  0 34
3914 .ve
3915 
3916    This can be represented as a collection of submatrices as:
3917 
3918 .vb
3919       A B C
3920       D E F
3921       G H I
3922 .ve
3923 
3924    Where the submatrices A,B,C are owned by proc0, D,E,F are
3925    owned by proc1, G,H,I are owned by proc2.
3926 
3927    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3928    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3929    The 'M','N' parameters are 8,8, and have the same values on all procs.
3930 
3931    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3932    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3933    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3934    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3935    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3936    matrix, ans [DF] as another SeqAIJ matrix.
3937 
3938    When d_nz, o_nz parameters are specified, d_nz storage elements are
3939    allocated for every row of the local diagonal submatrix, and o_nz
3940    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3941    One way to choose d_nz and o_nz is to use the max nonzerors per local
3942    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3943    In this case, the values of d_nz,o_nz are:
3944 .vb
3945      proc0 : dnz = 2, o_nz = 2
3946      proc1 : dnz = 3, o_nz = 2
3947      proc2 : dnz = 1, o_nz = 4
3948 .ve
3949    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3950    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3951    for proc3. i.e we are using 12+15+10=37 storage locations to store
3952    34 values.
3953 
3954    When d_nnz, o_nnz parameters are specified, the storage is specified
3955    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3956    In the above case the values for d_nnz,o_nnz are:
3957 .vb
3958      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3959      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3960      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3961 .ve
3962    Here the space allocated is sum of all the above values i.e 34, and
3963    hence pre-allocation is perfect.
3964 
3965    Level: intermediate
3966 
3967 .keywords: matrix, aij, compressed row, sparse, parallel
3968 
3969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3970           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3971 @*/
3972 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3973 {
3974   PetscErrorCode ierr;
3975 
3976   PetscFunctionBegin;
3977   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3978   PetscValidType(B,1);
3979   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 #undef __FUNCT__
3984 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3985 /*@
3986      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3987          CSR format the local rows.
3988 
3989    Collective on MPI_Comm
3990 
3991    Input Parameters:
3992 +  comm - MPI communicator
3993 .  m - number of local rows (Cannot be PETSC_DECIDE)
3994 .  n - This value should be the same as the local size used in creating the
3995        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3996        calculated if N is given) For square matrices n is almost always m.
3997 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3998 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3999 .   i - row indices
4000 .   j - column indices
4001 -   a - matrix values
4002 
4003    Output Parameter:
4004 .   mat - the matrix
4005 
4006    Level: intermediate
4007 
4008    Notes:
4009        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4010      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4011      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4012 
4013        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4014 
4015        The format which is used for the sparse matrix input, is equivalent to a
4016     row-major ordering.. i.e for the following matrix, the input data expected is
4017     as shown:
4018 
4019         1 0 0
4020         2 0 3     P0
4021        -------
4022         4 5 6     P1
4023 
4024      Process0 [P0]: rows_owned=[0,1]
4025         i =  {0,1,3}  [size = nrow+1  = 2+1]
4026         j =  {0,0,2}  [size = nz = 6]
4027         v =  {1,2,3}  [size = nz = 6]
4028 
4029      Process1 [P1]: rows_owned=[2]
4030         i =  {0,3}    [size = nrow+1  = 1+1]
4031         j =  {0,1,2}  [size = nz = 6]
4032         v =  {4,5,6}  [size = nz = 6]
4033 
4034 .keywords: matrix, aij, compressed row, sparse, parallel
4035 
4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4037           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4038 @*/
4039 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4040 {
4041   PetscErrorCode ierr;
4042 
4043   PetscFunctionBegin;
4044   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4045   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4046   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4047   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4048   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4049   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4050   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 #undef __FUNCT__
4055 #define __FUNCT__ "MatCreateAIJ"
4056 /*@C
4057    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4058    (the default parallel PETSc format).  For good matrix assembly performance
4059    the user should preallocate the matrix storage by setting the parameters
4060    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4061    performance can be increased by more than a factor of 50.
4062 
4063    Collective on MPI_Comm
4064 
4065    Input Parameters:
4066 +  comm - MPI communicator
4067 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4068            This value should be the same as the local size used in creating the
4069            y vector for the matrix-vector product y = Ax.
4070 .  n - This value should be the same as the local size used in creating the
4071        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4072        calculated if N is given) For square matrices n is almost always m.
4073 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4074 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4075 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4076            (same value is used for all local rows)
4077 .  d_nnz - array containing the number of nonzeros in the various rows of the
4078            DIAGONAL portion of the local submatrix (possibly different for each row)
4079            or NULL, if d_nz is used to specify the nonzero structure.
4080            The size of this array is equal to the number of local rows, i.e 'm'.
4081 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4082            submatrix (same value is used for all local rows).
4083 -  o_nnz - array containing the number of nonzeros in the various rows of the
4084            OFF-DIAGONAL portion of the local submatrix (possibly different for
4085            each row) or NULL, if o_nz is used to specify the nonzero
4086            structure. The size of this array is equal to the number
4087            of local rows, i.e 'm'.
4088 
4089    Output Parameter:
4090 .  A - the matrix
4091 
4092    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4093    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4094    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4095 
4096    Notes:
4097    If the *_nnz parameter is given then the *_nz parameter is ignored
4098 
4099    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4100    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4101    storage requirements for this matrix.
4102 
4103    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4104    processor than it must be used on all processors that share the object for
4105    that argument.
4106 
4107    The user MUST specify either the local or global matrix dimensions
4108    (possibly both).
4109 
4110    The parallel matrix is partitioned across processors such that the
4111    first m0 rows belong to process 0, the next m1 rows belong to
4112    process 1, the next m2 rows belong to process 2 etc.. where
4113    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4114    values corresponding to [m x N] submatrix.
4115 
4116    The columns are logically partitioned with the n0 columns belonging
4117    to 0th partition, the next n1 columns belonging to the next
4118    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4119 
4120    The DIAGONAL portion of the local submatrix on any given processor
4121    is the submatrix corresponding to the rows and columns m,n
4122    corresponding to the given processor. i.e diagonal matrix on
4123    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4124    etc. The remaining portion of the local submatrix [m x (N-n)]
4125    constitute the OFF-DIAGONAL portion. The example below better
4126    illustrates this concept.
4127 
4128    For a square global matrix we define each processor's diagonal portion
4129    to be its local rows and the corresponding columns (a square submatrix);
4130    each processor's off-diagonal portion encompasses the remainder of the
4131    local matrix (a rectangular submatrix).
4132 
4133    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4134 
4135    When calling this routine with a single process communicator, a matrix of
4136    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4137    type of communicator, use the construction mechanism:
4138      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4139 
4140    By default, this format uses inodes (identical nodes) when possible.
4141    We search for consecutive rows with the same nonzero structure, thereby
4142    reusing matrix information to achieve increased efficiency.
4143 
4144    Options Database Keys:
4145 +  -mat_no_inode  - Do not use inodes
4146 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4147 -  -mat_aij_oneindex - Internally use indexing starting at 1
4148         rather than 0.  Note that when calling MatSetValues(),
4149         the user still MUST index entries starting at 0!
4150 
4151 
4152    Example usage:
4153 
4154    Consider the following 8x8 matrix with 34 non-zero values, that is
4155    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4156    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4157    as follows:
4158 
4159 .vb
4160             1  2  0  |  0  3  0  |  0  4
4161     Proc0   0  5  6  |  7  0  0  |  8  0
4162             9  0 10  | 11  0  0  | 12  0
4163     -------------------------------------
4164            13  0 14  | 15 16 17  |  0  0
4165     Proc1   0 18  0  | 19 20 21  |  0  0
4166             0  0  0  | 22 23  0  | 24  0
4167     -------------------------------------
4168     Proc2  25 26 27  |  0  0 28  | 29  0
4169            30  0  0  | 31 32 33  |  0 34
4170 .ve
4171 
4172    This can be represented as a collection of submatrices as:
4173 
4174 .vb
4175       A B C
4176       D E F
4177       G H I
4178 .ve
4179 
4180    Where the submatrices A,B,C are owned by proc0, D,E,F are
4181    owned by proc1, G,H,I are owned by proc2.
4182 
4183    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4184    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4185    The 'M','N' parameters are 8,8, and have the same values on all procs.
4186 
4187    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4188    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4189    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4190    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4191    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4192    matrix, ans [DF] as another SeqAIJ matrix.
4193 
4194    When d_nz, o_nz parameters are specified, d_nz storage elements are
4195    allocated for every row of the local diagonal submatrix, and o_nz
4196    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4197    One way to choose d_nz and o_nz is to use the max nonzerors per local
4198    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4199    In this case, the values of d_nz,o_nz are:
4200 .vb
4201      proc0 : dnz = 2, o_nz = 2
4202      proc1 : dnz = 3, o_nz = 2
4203      proc2 : dnz = 1, o_nz = 4
4204 .ve
4205    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4206    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4207    for proc3. i.e we are using 12+15+10=37 storage locations to store
4208    34 values.
4209 
4210    When d_nnz, o_nnz parameters are specified, the storage is specified
4211    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4212    In the above case the values for d_nnz,o_nnz are:
4213 .vb
4214      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4215      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4216      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4217 .ve
4218    Here the space allocated is sum of all the above values i.e 34, and
4219    hence pre-allocation is perfect.
4220 
4221    Level: intermediate
4222 
4223 .keywords: matrix, aij, compressed row, sparse, parallel
4224 
4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4226           MPIAIJ, MatCreateMPIAIJWithArrays()
4227 @*/
4228 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4229 {
4230   PetscErrorCode ierr;
4231   PetscMPIInt    size;
4232 
4233   PetscFunctionBegin;
4234   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4235   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4237   if (size > 1) {
4238     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4239     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4240   } else {
4241     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4242     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4243   }
4244   PetscFunctionReturn(0);
4245 }
4246 
4247 #undef __FUNCT__
4248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4249 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4250 {
4251   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4252 
4253   PetscFunctionBegin;
4254   if (Ad)     *Ad     = a->A;
4255   if (Ao)     *Ao     = a->B;
4256   if (colmap) *colmap = a->garray;
4257   PetscFunctionReturn(0);
4258 }
4259 
4260 #undef __FUNCT__
4261 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4263 {
4264   PetscErrorCode ierr;
4265   PetscInt       i;
4266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4267 
4268   PetscFunctionBegin;
4269   if (coloring->ctype == IS_COLORING_GLOBAL) {
4270     ISColoringValue *allcolors,*colors;
4271     ISColoring      ocoloring;
4272 
4273     /* set coloring for diagonal portion */
4274     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4275 
4276     /* set coloring for off-diagonal portion */
4277     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4278     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->B->cmap->n; i++) {
4280       colors[i] = allcolors[a->garray[i]];
4281     }
4282     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4287     ISColoringValue *colors;
4288     PetscInt        *larray;
4289     ISColoring      ocoloring;
4290 
4291     /* set coloring for diagonal portion */
4292     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4293     for (i=0; i<a->A->cmap->n; i++) {
4294       larray[i] = i + A->cmap->rstart;
4295     }
4296     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4297     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4298     for (i=0; i<a->A->cmap->n; i++) {
4299       colors[i] = coloring->colors[larray[i]];
4300     }
4301     ierr = PetscFree(larray);CHKERRQ(ierr);
4302     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4303     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4304     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4305 
4306     /* set coloring for off-diagonal portion */
4307     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4308     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4309     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4310     for (i=0; i<a->B->cmap->n; i++) {
4311       colors[i] = coloring->colors[larray[i]];
4312     }
4313     ierr = PetscFree(larray);CHKERRQ(ierr);
4314     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4315     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4316     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4317   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4323 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4324 {
4325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4326   PetscErrorCode ierr;
4327 
4328   PetscFunctionBegin;
4329   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4330   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4331   PetscFunctionReturn(0);
4332 }
4333 
4334 #undef __FUNCT__
4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4336 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4337 {
4338   PetscErrorCode ierr;
4339   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4340   PetscInt       *indx;
4341 
4342   PetscFunctionBegin;
4343   /* This routine will ONLY return MPIAIJ type matrix */
4344   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4345   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4346   if (n == PETSC_DECIDE) {
4347     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4348   }
4349   /* Check sum(n) = N */
4350   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4352 
4353   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4354   rstart -= m;
4355 
4356   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4357   for (i=0; i<m; i++) {
4358     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4360     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4361   }
4362 
4363   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4364   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4365   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4366   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4367   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4368   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4374 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4375 {
4376   PetscErrorCode ierr;
4377   PetscInt       m,N,i,rstart,nnz,Ii;
4378   PetscInt       *indx;
4379   PetscScalar    *values;
4380 
4381   PetscFunctionBegin;
4382   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4383   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4384   for (i=0; i<m; i++) {
4385     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4386     Ii   = i + rstart;
4387     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4388     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4389   }
4390   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4391   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4397 /*@
4398       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4399                  matrices from each processor
4400 
4401     Collective on MPI_Comm
4402 
4403    Input Parameters:
4404 +    comm - the communicators the parallel matrix will live on
4405 .    inmat - the input sequential matrices
4406 .    n - number of local columns (or PETSC_DECIDE)
4407 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4408 
4409    Output Parameter:
4410 .    outmat - the parallel matrix generated
4411 
4412     Level: advanced
4413 
4414    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4415 
4416 @*/
4417 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4418 {
4419   PetscErrorCode ierr;
4420   PetscMPIInt    size;
4421 
4422   PetscFunctionBegin;
4423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4424   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4425   if (size == 1) {
4426     if (scall == MAT_INITIAL_MATRIX) {
4427       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4428     } else {
4429       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4430     }
4431   } else {
4432     if (scall == MAT_INITIAL_MATRIX) {
4433       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4434     }
4435     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4436   }
4437   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4438   PetscFunctionReturn(0);
4439 }
4440 
4441 #undef __FUNCT__
4442 #define __FUNCT__ "MatFileSplit"
4443 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4444 {
4445   PetscErrorCode    ierr;
4446   PetscMPIInt       rank;
4447   PetscInt          m,N,i,rstart,nnz;
4448   size_t            len;
4449   const PetscInt    *indx;
4450   PetscViewer       out;
4451   char              *name;
4452   Mat               B;
4453   const PetscScalar *values;
4454 
4455   PetscFunctionBegin;
4456   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4457   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4458   /* Should this be the type of the diagonal block of A? */
4459   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4460   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4461   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4462   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4463   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4464   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4465   for (i=0; i<m; i++) {
4466     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4467     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4468     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4469   }
4470   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4471   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472 
4473   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4474   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4475   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4476   sprintf(name,"%s.%d",outfile,rank);
4477   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4478   ierr = PetscFree(name);CHKERRQ(ierr);
4479   ierr = MatView(B,out);CHKERRQ(ierr);
4480   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4481   ierr = MatDestroy(&B);CHKERRQ(ierr);
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4486 #undef __FUNCT__
4487 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4488 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4489 {
4490   PetscErrorCode      ierr;
4491   Mat_Merge_SeqsToMPI *merge;
4492   PetscContainer      container;
4493 
4494   PetscFunctionBegin;
4495   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4496   if (container) {
4497     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4510     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4511     ierr = PetscFree(merge);CHKERRQ(ierr);
4512     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4513   }
4514   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 #include <../src/mat/utils/freespace.h>
4519 #include <petscbt.h>
4520 
4521 #undef __FUNCT__
4522 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4523 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4524 {
4525   PetscErrorCode      ierr;
4526   MPI_Comm            comm;
4527   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4528   PetscMPIInt         size,rank,taga,*len_s;
4529   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4530   PetscInt            proc,m;
4531   PetscInt            **buf_ri,**buf_rj;
4532   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4533   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4534   MPI_Request         *s_waits,*r_waits;
4535   MPI_Status          *status;
4536   MatScalar           *aa=a->a;
4537   MatScalar           **abuf_r,*ba_i;
4538   Mat_Merge_SeqsToMPI *merge;
4539   PetscContainer      container;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4543   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4544 
4545   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4546   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4547 
4548   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4549   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4550 
4551   bi     = merge->bi;
4552   bj     = merge->bj;
4553   buf_ri = merge->buf_ri;
4554   buf_rj = merge->buf_rj;
4555 
4556   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4557   owners = merge->rowmap->range;
4558   len_s  = merge->len_s;
4559 
4560   /* send and recv matrix values */
4561   /*-----------------------------*/
4562   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4563   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4564 
4565   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4566   for (proc=0,k=0; proc<size; proc++) {
4567     if (!len_s[proc]) continue;
4568     i    = owners[proc];
4569     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4570     k++;
4571   }
4572 
4573   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4574   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4575   ierr = PetscFree(status);CHKERRQ(ierr);
4576 
4577   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4578   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4579 
4580   /* insert mat values of mpimat */
4581   /*----------------------------*/
4582   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4583   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4584 
4585   for (k=0; k<merge->nrecv; k++) {
4586     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4587     nrows       = *(buf_ri_k[k]);
4588     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4589     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4590   }
4591 
4592   /* set values of ba */
4593   m = merge->rowmap->n;
4594   for (i=0; i<m; i++) {
4595     arow = owners[rank] + i;
4596     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4597     bnzi = bi[i+1] - bi[i];
4598     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4599 
4600     /* add local non-zero vals of this proc's seqmat into ba */
4601     anzi   = ai[arow+1] - ai[arow];
4602     aj     = a->j + ai[arow];
4603     aa     = a->a + ai[arow];
4604     nextaj = 0;
4605     for (j=0; nextaj<anzi; j++) {
4606       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4607         ba_i[j] += aa[nextaj++];
4608       }
4609     }
4610 
4611     /* add received vals into ba */
4612     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4613       /* i-th row */
4614       if (i == *nextrow[k]) {
4615         anzi   = *(nextai[k]+1) - *nextai[k];
4616         aj     = buf_rj[k] + *(nextai[k]);
4617         aa     = abuf_r[k] + *(nextai[k]);
4618         nextaj = 0;
4619         for (j=0; nextaj<anzi; j++) {
4620           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4621             ba_i[j] += aa[nextaj++];
4622           }
4623         }
4624         nextrow[k]++; nextai[k]++;
4625       }
4626     }
4627     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4628   }
4629   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4630   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631 
4632   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4633   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4634   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4635   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4636   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4637   PetscFunctionReturn(0);
4638 }
4639 
4640 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4641 
4642 #undef __FUNCT__
4643 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4644 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4645 {
4646   PetscErrorCode      ierr;
4647   Mat                 B_mpi;
4648   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4649   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4650   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4651   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4652   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4653   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4654   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4655   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4656   MPI_Status          *status;
4657   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4658   PetscBT             lnkbt;
4659   Mat_Merge_SeqsToMPI *merge;
4660   PetscContainer      container;
4661 
4662   PetscFunctionBegin;
4663   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4664 
4665   /* make sure it is a PETSc comm */
4666   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4667   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4668   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4669 
4670   ierr = PetscNew(&merge);CHKERRQ(ierr);
4671   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4672 
4673   /* determine row ownership */
4674   /*---------------------------------------------------------*/
4675   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4676   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4677   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4678   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4679   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4680   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4681   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4682 
4683   m      = merge->rowmap->n;
4684   owners = merge->rowmap->range;
4685 
4686   /* determine the number of messages to send, their lengths */
4687   /*---------------------------------------------------------*/
4688   len_s = merge->len_s;
4689 
4690   len          = 0; /* length of buf_si[] */
4691   merge->nsend = 0;
4692   for (proc=0; proc<size; proc++) {
4693     len_si[proc] = 0;
4694     if (proc == rank) {
4695       len_s[proc] = 0;
4696     } else {
4697       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4698       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4699     }
4700     if (len_s[proc]) {
4701       merge->nsend++;
4702       nrows = 0;
4703       for (i=owners[proc]; i<owners[proc+1]; i++) {
4704         if (ai[i+1] > ai[i]) nrows++;
4705       }
4706       len_si[proc] = 2*(nrows+1);
4707       len         += len_si[proc];
4708     }
4709   }
4710 
4711   /* determine the number and length of messages to receive for ij-structure */
4712   /*-------------------------------------------------------------------------*/
4713   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4714   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4715 
4716   /* post the Irecv of j-structure */
4717   /*-------------------------------*/
4718   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4719   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4720 
4721   /* post the Isend of j-structure */
4722   /*--------------------------------*/
4723   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4724 
4725   for (proc=0, k=0; proc<size; proc++) {
4726     if (!len_s[proc]) continue;
4727     i    = owners[proc];
4728     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4729     k++;
4730   }
4731 
4732   /* receives and sends of j-structure are complete */
4733   /*------------------------------------------------*/
4734   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4735   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4736 
4737   /* send and recv i-structure */
4738   /*---------------------------*/
4739   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4740   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4741 
4742   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4743   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4744   for (proc=0,k=0; proc<size; proc++) {
4745     if (!len_s[proc]) continue;
4746     /* form outgoing message for i-structure:
4747          buf_si[0]:                 nrows to be sent
4748                [1:nrows]:           row index (global)
4749                [nrows+1:2*nrows+1]: i-structure index
4750     */
4751     /*-------------------------------------------*/
4752     nrows       = len_si[proc]/2 - 1;
4753     buf_si_i    = buf_si + nrows+1;
4754     buf_si[0]   = nrows;
4755     buf_si_i[0] = 0;
4756     nrows       = 0;
4757     for (i=owners[proc]; i<owners[proc+1]; i++) {
4758       anzi = ai[i+1] - ai[i];
4759       if (anzi) {
4760         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4761         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4762         nrows++;
4763       }
4764     }
4765     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4766     k++;
4767     buf_si += len_si[proc];
4768   }
4769 
4770   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4771   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4772 
4773   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4774   for (i=0; i<merge->nrecv; i++) {
4775     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4776   }
4777 
4778   ierr = PetscFree(len_si);CHKERRQ(ierr);
4779   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4780   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4781   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4782   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4783   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4784   ierr = PetscFree(status);CHKERRQ(ierr);
4785 
4786   /* compute a local seq matrix in each processor */
4787   /*----------------------------------------------*/
4788   /* allocate bi array and free space for accumulating nonzero column info */
4789   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4790   bi[0] = 0;
4791 
4792   /* create and initialize a linked list */
4793   nlnk = N+1;
4794   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4795 
4796   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4797   len  = ai[owners[rank+1]] - ai[owners[rank]];
4798   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4799 
4800   current_space = free_space;
4801 
4802   /* determine symbolic info for each local row */
4803   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4804 
4805   for (k=0; k<merge->nrecv; k++) {
4806     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4807     nrows       = *buf_ri_k[k];
4808     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4809     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4810   }
4811 
4812   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4813   len  = 0;
4814   for (i=0; i<m; i++) {
4815     bnzi = 0;
4816     /* add local non-zero cols of this proc's seqmat into lnk */
4817     arow  = owners[rank] + i;
4818     anzi  = ai[arow+1] - ai[arow];
4819     aj    = a->j + ai[arow];
4820     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4821     bnzi += nlnk;
4822     /* add received col data into lnk */
4823     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4824       if (i == *nextrow[k]) { /* i-th row */
4825         anzi  = *(nextai[k]+1) - *nextai[k];
4826         aj    = buf_rj[k] + *nextai[k];
4827         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4828         bnzi += nlnk;
4829         nextrow[k]++; nextai[k]++;
4830       }
4831     }
4832     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4833 
4834     /* if free space is not available, make more free space */
4835     if (current_space->local_remaining<bnzi) {
4836       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4837       nspacedouble++;
4838     }
4839     /* copy data into free space, then initialize lnk */
4840     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4841     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4842 
4843     current_space->array           += bnzi;
4844     current_space->local_used      += bnzi;
4845     current_space->local_remaining -= bnzi;
4846 
4847     bi[i+1] = bi[i] + bnzi;
4848   }
4849 
4850   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4851 
4852   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4853   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4854   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4855 
4856   /* create symbolic parallel matrix B_mpi */
4857   /*---------------------------------------*/
4858   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4859   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4860   if (n==PETSC_DECIDE) {
4861     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4862   } else {
4863     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4864   }
4865   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4866   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4867   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4868   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4869   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4870 
4871   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4872   B_mpi->assembled    = PETSC_FALSE;
4873   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4874   merge->bi           = bi;
4875   merge->bj           = bj;
4876   merge->buf_ri       = buf_ri;
4877   merge->buf_rj       = buf_rj;
4878   merge->coi          = NULL;
4879   merge->coj          = NULL;
4880   merge->owners_co    = NULL;
4881 
4882   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4883 
4884   /* attach the supporting struct to B_mpi for reuse */
4885   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4886   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4887   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4888   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4889   *mpimat = B_mpi;
4890 
4891   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4892   PetscFunctionReturn(0);
4893 }
4894 
4895 #undef __FUNCT__
4896 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4897 /*@C
4898       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4899                  matrices from each processor
4900 
4901     Collective on MPI_Comm
4902 
4903    Input Parameters:
4904 +    comm - the communicators the parallel matrix will live on
4905 .    seqmat - the input sequential matrices
4906 .    m - number of local rows (or PETSC_DECIDE)
4907 .    n - number of local columns (or PETSC_DECIDE)
4908 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4909 
4910    Output Parameter:
4911 .    mpimat - the parallel matrix generated
4912 
4913     Level: advanced
4914 
4915    Notes:
4916      The dimensions of the sequential matrix in each processor MUST be the same.
4917      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4918      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4919 @*/
4920 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4921 {
4922   PetscErrorCode ierr;
4923   PetscMPIInt    size;
4924 
4925   PetscFunctionBegin;
4926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4927   if (size == 1) {
4928     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929     if (scall == MAT_INITIAL_MATRIX) {
4930       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4931     } else {
4932       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4933     }
4934     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935     PetscFunctionReturn(0);
4936   }
4937   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4938   if (scall == MAT_INITIAL_MATRIX) {
4939     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4940   }
4941   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4942   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 #undef __FUNCT__
4947 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4948 /*@
4949      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4950           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4951           with MatGetSize()
4952 
4953     Not Collective
4954 
4955    Input Parameters:
4956 +    A - the matrix
4957 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 
4959    Output Parameter:
4960 .    A_loc - the local sequential matrix generated
4961 
4962     Level: developer
4963 
4964 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4965 
4966 @*/
4967 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4968 {
4969   PetscErrorCode ierr;
4970   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4971   Mat_SeqAIJ     *mat,*a,*b;
4972   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4973   MatScalar      *aa,*ba,*cam;
4974   PetscScalar    *ca;
4975   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4976   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4977   PetscBool      match;
4978   MPI_Comm       comm;
4979   PetscMPIInt    size;
4980 
4981   PetscFunctionBegin;
4982   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4983   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4984   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4985   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4986   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4987 
4988   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4989   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4990   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4991   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4992   aa = a->a; ba = b->a;
4993   if (scall == MAT_INITIAL_MATRIX) {
4994     if (size == 1) {
4995       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4996       PetscFunctionReturn(0);
4997     }
4998 
4999     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
5000     ci[0] = 0;
5001     for (i=0; i<am; i++) {
5002       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5003     }
5004     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
5005     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
5006     k    = 0;
5007     for (i=0; i<am; i++) {
5008       ncols_o = bi[i+1] - bi[i];
5009       ncols_d = ai[i+1] - ai[i];
5010       /* off-diagonal portion of A */
5011       for (jo=0; jo<ncols_o; jo++) {
5012         col = cmap[*bj];
5013         if (col >= cstart) break;
5014         cj[k]   = col; bj++;
5015         ca[k++] = *ba++;
5016       }
5017       /* diagonal portion of A */
5018       for (j=0; j<ncols_d; j++) {
5019         cj[k]   = cstart + *aj++;
5020         ca[k++] = *aa++;
5021       }
5022       /* off-diagonal portion of A */
5023       for (j=jo; j<ncols_o; j++) {
5024         cj[k]   = cmap[*bj++];
5025         ca[k++] = *ba++;
5026       }
5027     }
5028     /* put together the new matrix */
5029     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5030     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5031     /* Since these are PETSc arrays, change flags to free them as necessary. */
5032     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5033     mat->free_a  = PETSC_TRUE;
5034     mat->free_ij = PETSC_TRUE;
5035     mat->nonew   = 0;
5036   } else if (scall == MAT_REUSE_MATRIX) {
5037     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5038     ci = mat->i; cj = mat->j; cam = mat->a;
5039     for (i=0; i<am; i++) {
5040       /* off-diagonal portion of A */
5041       ncols_o = bi[i+1] - bi[i];
5042       for (jo=0; jo<ncols_o; jo++) {
5043         col = cmap[*bj];
5044         if (col >= cstart) break;
5045         *cam++ = *ba++; bj++;
5046       }
5047       /* diagonal portion of A */
5048       ncols_d = ai[i+1] - ai[i];
5049       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5050       /* off-diagonal portion of A */
5051       for (j=jo; j<ncols_o; j++) {
5052         *cam++ = *ba++; bj++;
5053       }
5054     }
5055   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5056   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5057   PetscFunctionReturn(0);
5058 }
5059 
5060 #undef __FUNCT__
5061 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5062 /*@C
5063      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5064 
5065     Not Collective
5066 
5067    Input Parameters:
5068 +    A - the matrix
5069 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5070 -    row, col - index sets of rows and columns to extract (or NULL)
5071 
5072    Output Parameter:
5073 .    A_loc - the local sequential matrix generated
5074 
5075     Level: developer
5076 
5077 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5078 
5079 @*/
5080 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5081 {
5082   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5083   PetscErrorCode ierr;
5084   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5085   IS             isrowa,iscola;
5086   Mat            *aloc;
5087   PetscBool      match;
5088 
5089   PetscFunctionBegin;
5090   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5091   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5092   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5093   if (!row) {
5094     start = A->rmap->rstart; end = A->rmap->rend;
5095     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5096   } else {
5097     isrowa = *row;
5098   }
5099   if (!col) {
5100     start = A->cmap->rstart;
5101     cmap  = a->garray;
5102     nzA   = a->A->cmap->n;
5103     nzB   = a->B->cmap->n;
5104     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5105     ncols = 0;
5106     for (i=0; i<nzB; i++) {
5107       if (cmap[i] < start) idx[ncols++] = cmap[i];
5108       else break;
5109     }
5110     imark = i;
5111     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5112     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5113     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5114   } else {
5115     iscola = *col;
5116   }
5117   if (scall != MAT_INITIAL_MATRIX) {
5118     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5119     aloc[0] = *A_loc;
5120   }
5121   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5122   *A_loc = aloc[0];
5123   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5124   if (!row) {
5125     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5126   }
5127   if (!col) {
5128     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5129   }
5130   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 #undef __FUNCT__
5135 #define __FUNCT__ "MatGetBrowsOfAcols"
5136 /*@C
5137     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5138 
5139     Collective on Mat
5140 
5141    Input Parameters:
5142 +    A,B - the matrices in mpiaij format
5143 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5144 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5145 
5146    Output Parameter:
5147 +    rowb, colb - index sets of rows and columns of B to extract
5148 -    B_seq - the sequential matrix generated
5149 
5150     Level: developer
5151 
5152 @*/
5153 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5154 {
5155   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5156   PetscErrorCode ierr;
5157   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5158   IS             isrowb,iscolb;
5159   Mat            *bseq=NULL;
5160 
5161   PetscFunctionBegin;
5162   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5163     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5164   }
5165   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5166 
5167   if (scall == MAT_INITIAL_MATRIX) {
5168     start = A->cmap->rstart;
5169     cmap  = a->garray;
5170     nzA   = a->A->cmap->n;
5171     nzB   = a->B->cmap->n;
5172     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5173     ncols = 0;
5174     for (i=0; i<nzB; i++) {  /* row < local row index */
5175       if (cmap[i] < start) idx[ncols++] = cmap[i];
5176       else break;
5177     }
5178     imark = i;
5179     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5180     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5181     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5182     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5183   } else {
5184     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5185     isrowb  = *rowb; iscolb = *colb;
5186     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5187     bseq[0] = *B_seq;
5188   }
5189   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5190   *B_seq = bseq[0];
5191   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5192   if (!rowb) {
5193     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5194   } else {
5195     *rowb = isrowb;
5196   }
5197   if (!colb) {
5198     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5199   } else {
5200     *colb = iscolb;
5201   }
5202   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5203   PetscFunctionReturn(0);
5204 }
5205 
5206 #undef __FUNCT__
5207 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5208 /*
5209     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5210     of the OFF-DIAGONAL portion of local A
5211 
5212     Collective on Mat
5213 
5214    Input Parameters:
5215 +    A,B - the matrices in mpiaij format
5216 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5217 
5218    Output Parameter:
5219 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5220 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5221 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5222 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5223 
5224     Level: developer
5225 
5226 */
5227 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5228 {
5229   VecScatter_MPI_General *gen_to,*gen_from;
5230   PetscErrorCode         ierr;
5231   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5232   Mat_SeqAIJ             *b_oth;
5233   VecScatter             ctx =a->Mvctx;
5234   MPI_Comm               comm;
5235   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5236   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5237   PetscScalar            *rvalues,*svalues;
5238   MatScalar              *b_otha,*bufa,*bufA;
5239   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5240   MPI_Request            *rwaits = NULL,*swaits = NULL;
5241   MPI_Status             *sstatus,rstatus;
5242   PetscMPIInt            jj,size;
5243   PetscInt               *cols,sbs,rbs;
5244   PetscScalar            *vals;
5245 
5246   PetscFunctionBegin;
5247   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5248   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5249   if (size == 1) PetscFunctionReturn(0);
5250 
5251   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5252     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5253   }
5254   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5255   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5256 
5257   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5258   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5259   rvalues  = gen_from->values; /* holds the length of receiving row */
5260   svalues  = gen_to->values;   /* holds the length of sending row */
5261   nrecvs   = gen_from->n;
5262   nsends   = gen_to->n;
5263 
5264   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5265   srow    = gen_to->indices;    /* local row index to be sent */
5266   sstarts = gen_to->starts;
5267   sprocs  = gen_to->procs;
5268   sstatus = gen_to->sstatus;
5269   sbs     = gen_to->bs;
5270   rstarts = gen_from->starts;
5271   rprocs  = gen_from->procs;
5272   rbs     = gen_from->bs;
5273 
5274   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5275   if (scall == MAT_INITIAL_MATRIX) {
5276     /* i-array */
5277     /*---------*/
5278     /*  post receives */
5279     for (i=0; i<nrecvs; i++) {
5280       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5281       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5282       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5283     }
5284 
5285     /* pack the outgoing message */
5286     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5287 
5288     sstartsj[0] = 0;
5289     rstartsj[0] = 0;
5290     len         = 0; /* total length of j or a array to be sent */
5291     k           = 0;
5292     for (i=0; i<nsends; i++) {
5293       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5294       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5295       for (j=0; j<nrows; j++) {
5296         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5297         for (l=0; l<sbs; l++) {
5298           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5299 
5300           rowlen[j*sbs+l] = ncols;
5301 
5302           len += ncols;
5303           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5304         }
5305         k++;
5306       }
5307       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5308 
5309       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5310     }
5311     /* recvs and sends of i-array are completed */
5312     i = nrecvs;
5313     while (i--) {
5314       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5315     }
5316     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5317 
5318     /* allocate buffers for sending j and a arrays */
5319     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5320     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5321 
5322     /* create i-array of B_oth */
5323     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5324 
5325     b_othi[0] = 0;
5326     len       = 0; /* total length of j or a array to be received */
5327     k         = 0;
5328     for (i=0; i<nrecvs; i++) {
5329       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5330       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5331       for (j=0; j<nrows; j++) {
5332         b_othi[k+1] = b_othi[k] + rowlen[j];
5333         len        += rowlen[j]; k++;
5334       }
5335       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5336     }
5337 
5338     /* allocate space for j and a arrrays of B_oth */
5339     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5340     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5341 
5342     /* j-array */
5343     /*---------*/
5344     /*  post receives of j-array */
5345     for (i=0; i<nrecvs; i++) {
5346       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5347       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5348     }
5349 
5350     /* pack the outgoing message j-array */
5351     k = 0;
5352     for (i=0; i<nsends; i++) {
5353       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5354       bufJ  = bufj+sstartsj[i];
5355       for (j=0; j<nrows; j++) {
5356         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5357         for (ll=0; ll<sbs; ll++) {
5358           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5359           for (l=0; l<ncols; l++) {
5360             *bufJ++ = cols[l];
5361           }
5362           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5363         }
5364       }
5365       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5366     }
5367 
5368     /* recvs and sends of j-array are completed */
5369     i = nrecvs;
5370     while (i--) {
5371       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5372     }
5373     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5374   } else if (scall == MAT_REUSE_MATRIX) {
5375     sstartsj = *startsj_s;
5376     rstartsj = *startsj_r;
5377     bufa     = *bufa_ptr;
5378     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5379     b_otha   = b_oth->a;
5380   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5381 
5382   /* a-array */
5383   /*---------*/
5384   /*  post receives of a-array */
5385   for (i=0; i<nrecvs; i++) {
5386     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5387     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5388   }
5389 
5390   /* pack the outgoing message a-array */
5391   k = 0;
5392   for (i=0; i<nsends; i++) {
5393     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5394     bufA  = bufa+sstartsj[i];
5395     for (j=0; j<nrows; j++) {
5396       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5397       for (ll=0; ll<sbs; ll++) {
5398         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5399         for (l=0; l<ncols; l++) {
5400           *bufA++ = vals[l];
5401         }
5402         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5403       }
5404     }
5405     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5406   }
5407   /* recvs and sends of a-array are completed */
5408   i = nrecvs;
5409   while (i--) {
5410     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5411   }
5412   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5413   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5414 
5415   if (scall == MAT_INITIAL_MATRIX) {
5416     /* put together the new matrix */
5417     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5418 
5419     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5420     /* Since these are PETSc arrays, change flags to free them as necessary. */
5421     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5422     b_oth->free_a  = PETSC_TRUE;
5423     b_oth->free_ij = PETSC_TRUE;
5424     b_oth->nonew   = 0;
5425 
5426     ierr = PetscFree(bufj);CHKERRQ(ierr);
5427     if (!startsj_s || !bufa_ptr) {
5428       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5429       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5430     } else {
5431       *startsj_s = sstartsj;
5432       *startsj_r = rstartsj;
5433       *bufa_ptr  = bufa;
5434     }
5435   }
5436   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5437   PetscFunctionReturn(0);
5438 }
5439 
5440 #undef __FUNCT__
5441 #define __FUNCT__ "MatGetCommunicationStructs"
5442 /*@C
5443   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5444 
5445   Not Collective
5446 
5447   Input Parameters:
5448 . A - The matrix in mpiaij format
5449 
5450   Output Parameter:
5451 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5452 . colmap - A map from global column index to local index into lvec
5453 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5454 
5455   Level: developer
5456 
5457 @*/
5458 #if defined(PETSC_USE_CTABLE)
5459 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5460 #else
5461 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5462 #endif
5463 {
5464   Mat_MPIAIJ *a;
5465 
5466   PetscFunctionBegin;
5467   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5468   PetscValidPointer(lvec, 2);
5469   PetscValidPointer(colmap, 3);
5470   PetscValidPointer(multScatter, 4);
5471   a = (Mat_MPIAIJ*) A->data;
5472   if (lvec) *lvec = a->lvec;
5473   if (colmap) *colmap = a->colmap;
5474   if (multScatter) *multScatter = a->Mvctx;
5475   PetscFunctionReturn(0);
5476 }
5477 
5478 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5479 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5480 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5481 #if defined(PETSC_HAVE_ELEMENTAL)
5482 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5483 #endif
5484 
5485 #undef __FUNCT__
5486 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5487 /*
5488     Computes (B'*A')' since computing B*A directly is untenable
5489 
5490                n                       p                          p
5491         (              )       (              )         (                  )
5492       m (      A       )  *  n (       B      )   =   m (         C        )
5493         (              )       (              )         (                  )
5494 
5495 */
5496 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5497 {
5498   PetscErrorCode ierr;
5499   Mat            At,Bt,Ct;
5500 
5501   PetscFunctionBegin;
5502   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5503   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5504   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5505   ierr = MatDestroy(&At);CHKERRQ(ierr);
5506   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5507   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5508   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5509   PetscFunctionReturn(0);
5510 }
5511 
5512 #undef __FUNCT__
5513 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5514 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5515 {
5516   PetscErrorCode ierr;
5517   PetscInt       m=A->rmap->n,n=B->cmap->n;
5518   Mat            Cmat;
5519 
5520   PetscFunctionBegin;
5521   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5522   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5523   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5524   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5525   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5526   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5527   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5528   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5529 
5530   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5531 
5532   *C = Cmat;
5533   PetscFunctionReturn(0);
5534 }
5535 
5536 /* ----------------------------------------------------------------*/
5537 #undef __FUNCT__
5538 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5539 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5540 {
5541   PetscErrorCode ierr;
5542 
5543   PetscFunctionBegin;
5544   if (scall == MAT_INITIAL_MATRIX) {
5545     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5546     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5547     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5548   }
5549   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5550   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5551   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 #if defined(PETSC_HAVE_MUMPS)
5556 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5557 #endif
5558 #if defined(PETSC_HAVE_PASTIX)
5559 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5560 #endif
5561 #if defined(PETSC_HAVE_SUPERLU_DIST)
5562 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5563 #endif
5564 #if defined(PETSC_HAVE_CLIQUE)
5565 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5566 #endif
5567 
5568 /*MC
5569    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5570 
5571    Options Database Keys:
5572 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5573 
5574   Level: beginner
5575 
5576 .seealso: MatCreateAIJ()
5577 M*/
5578 
5579 #undef __FUNCT__
5580 #define __FUNCT__ "MatCreate_MPIAIJ"
5581 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5582 {
5583   Mat_MPIAIJ     *b;
5584   PetscErrorCode ierr;
5585   PetscMPIInt    size;
5586 
5587   PetscFunctionBegin;
5588   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5589 
5590   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5591   B->data       = (void*)b;
5592   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5593   B->assembled  = PETSC_FALSE;
5594   B->insertmode = NOT_SET_VALUES;
5595   b->size       = size;
5596 
5597   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5598 
5599   /* build cache for off array entries formed */
5600   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5601 
5602   b->donotstash  = PETSC_FALSE;
5603   b->colmap      = 0;
5604   b->garray      = 0;
5605   b->roworiented = PETSC_TRUE;
5606 
5607   /* stuff used for matrix vector multiply */
5608   b->lvec  = NULL;
5609   b->Mvctx = NULL;
5610 
5611   /* stuff for MatGetRow() */
5612   b->rowindices   = 0;
5613   b->rowvalues    = 0;
5614   b->getrowactive = PETSC_FALSE;
5615 
5616   /* flexible pointer used in CUSP/CUSPARSE classes */
5617   b->spptr = NULL;
5618 
5619 #if defined(PETSC_HAVE_MUMPS)
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5621 #endif
5622 #if defined(PETSC_HAVE_PASTIX)
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5624 #endif
5625 #if defined(PETSC_HAVE_SUPERLU_DIST)
5626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5627 #endif
5628 #if defined(PETSC_HAVE_CLIQUE)
5629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5630 #endif
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5641 #if defined(PETSC_HAVE_ELEMENTAL)
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5643 #endif
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5648   PetscFunctionReturn(0);
5649 }
5650 
5651 #undef __FUNCT__
5652 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5653 /*@
5654      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5655          and "off-diagonal" part of the matrix in CSR format.
5656 
5657    Collective on MPI_Comm
5658 
5659    Input Parameters:
5660 +  comm - MPI communicator
5661 .  m - number of local rows (Cannot be PETSC_DECIDE)
5662 .  n - This value should be the same as the local size used in creating the
5663        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5664        calculated if N is given) For square matrices n is almost always m.
5665 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5666 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5667 .   i - row indices for "diagonal" portion of matrix
5668 .   j - column indices
5669 .   a - matrix values
5670 .   oi - row indices for "off-diagonal" portion of matrix
5671 .   oj - column indices
5672 -   oa - matrix values
5673 
5674    Output Parameter:
5675 .   mat - the matrix
5676 
5677    Level: advanced
5678 
5679    Notes:
5680        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5681        must free the arrays once the matrix has been destroyed and not before.
5682 
5683        The i and j indices are 0 based
5684 
5685        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5686 
5687        This sets local rows and cannot be used to set off-processor values.
5688 
5689        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5690        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5691        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5692        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5693        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5694        communication if it is known that only local entries will be set.
5695 
5696 .keywords: matrix, aij, compressed row, sparse, parallel
5697 
5698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5699           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5700 @*/
5701 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5702 {
5703   PetscErrorCode ierr;
5704   Mat_MPIAIJ     *maij;
5705 
5706   PetscFunctionBegin;
5707   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5708   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5709   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5710   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5711   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5712   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5713   maij = (Mat_MPIAIJ*) (*mat)->data;
5714 
5715   (*mat)->preallocated = PETSC_TRUE;
5716 
5717   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5718   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5719 
5720   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5721   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5722 
5723   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5724   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5725   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5727 
5728   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5729   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5730   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5731   PetscFunctionReturn(0);
5732 }
5733 
5734 /*
5735     Special version for direct calls from Fortran
5736 */
5737 #include <petsc-private/fortranimpl.h>
5738 
5739 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5740 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5741 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5742 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5743 #endif
5744 
5745 /* Change these macros so can be used in void function */
5746 #undef CHKERRQ
5747 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5748 #undef SETERRQ2
5749 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5750 #undef SETERRQ3
5751 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5752 #undef SETERRQ
5753 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5754 
5755 #undef __FUNCT__
5756 #define __FUNCT__ "matsetvaluesmpiaij_"
5757 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5758 {
5759   Mat            mat  = *mmat;
5760   PetscInt       m    = *mm, n = *mn;
5761   InsertMode     addv = *maddv;
5762   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5763   PetscScalar    value;
5764   PetscErrorCode ierr;
5765 
5766   MatCheckPreallocated(mat,1);
5767   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5768 
5769 #if defined(PETSC_USE_DEBUG)
5770   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5771 #endif
5772   {
5773     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5774     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5775     PetscBool roworiented = aij->roworiented;
5776 
5777     /* Some Variables required in the macro */
5778     Mat        A                 = aij->A;
5779     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5780     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5781     MatScalar  *aa               = a->a;
5782     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5783     Mat        B                 = aij->B;
5784     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5785     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5786     MatScalar  *ba               = b->a;
5787 
5788     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5789     PetscInt  nonew = a->nonew;
5790     MatScalar *ap1,*ap2;
5791 
5792     PetscFunctionBegin;
5793     for (i=0; i<m; i++) {
5794       if (im[i] < 0) continue;
5795 #if defined(PETSC_USE_DEBUG)
5796       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5797 #endif
5798       if (im[i] >= rstart && im[i] < rend) {
5799         row      = im[i] - rstart;
5800         lastcol1 = -1;
5801         rp1      = aj + ai[row];
5802         ap1      = aa + ai[row];
5803         rmax1    = aimax[row];
5804         nrow1    = ailen[row];
5805         low1     = 0;
5806         high1    = nrow1;
5807         lastcol2 = -1;
5808         rp2      = bj + bi[row];
5809         ap2      = ba + bi[row];
5810         rmax2    = bimax[row];
5811         nrow2    = bilen[row];
5812         low2     = 0;
5813         high2    = nrow2;
5814 
5815         for (j=0; j<n; j++) {
5816           if (roworiented) value = v[i*n+j];
5817           else value = v[i+j*m];
5818           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5819           if (in[j] >= cstart && in[j] < cend) {
5820             col = in[j] - cstart;
5821             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5822           } else if (in[j] < 0) continue;
5823 #if defined(PETSC_USE_DEBUG)
5824           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5825 #endif
5826           else {
5827             if (mat->was_assembled) {
5828               if (!aij->colmap) {
5829                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5830               }
5831 #if defined(PETSC_USE_CTABLE)
5832               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5833               col--;
5834 #else
5835               col = aij->colmap[in[j]] - 1;
5836 #endif
5837               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5838                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5839                 col  =  in[j];
5840                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5841                 B     = aij->B;
5842                 b     = (Mat_SeqAIJ*)B->data;
5843                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5844                 rp2   = bj + bi[row];
5845                 ap2   = ba + bi[row];
5846                 rmax2 = bimax[row];
5847                 nrow2 = bilen[row];
5848                 low2  = 0;
5849                 high2 = nrow2;
5850                 bm    = aij->B->rmap->n;
5851                 ba    = b->a;
5852               }
5853             } else col = in[j];
5854             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5855           }
5856         }
5857       } else if (!aij->donotstash) {
5858         if (roworiented) {
5859           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5860         } else {
5861           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5862         }
5863       }
5864     }
5865   }
5866   PetscFunctionReturnVoid();
5867 }
5868 
5869