xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bcab024551cbaaaa7f49e357634a3584f91cb6d5)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
730   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
731 
732   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
733   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
734     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
735     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
743 {
744   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
749   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_MPIAIJ"
755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
756 {
757   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
758   PetscInt      *owners = A->rmap->range;
759   PetscInt       n      = A->rmap->n;
760   PetscSF        sf;
761   PetscInt      *lrows;
762   PetscSFNode   *rrows;
763   PetscInt       r, p = 0, len = 0;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   /* Create SF where leaves are input rows and roots are owned rows */
768   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
769   for (r = 0; r < n; ++r) lrows[r] = -1;
770   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
771   for (r = 0; r < N; ++r) {
772     const PetscInt idx   = rows[r];
773     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
774     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
775       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
776     }
777     if (A->nooffproczerorows) {
778       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
779       lrows[len++] = idx - owners[p];
780     } else {
781       rrows[r].rank = p;
782       rrows[r].index = rows[r] - owners[p];
783     }
784   }
785   if (!A->nooffproczerorows) {
786     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
787     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
788     /* Collect flags for rows to be zeroed */
789     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
790     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
792     /* Compress and put in row numbers */
793     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
794   }
795   /* fix right hand side if needed */
796   if (x && b) {
797     const PetscScalar *xx;
798     PetscScalar       *bb;
799 
800     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
801     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
802     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
803     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
804     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
805   }
806   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
807   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
809     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
810   } else if (diag != 0.0) {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
813     for (r = 0; r < len; ++r) {
814       const PetscInt row = lrows[r] + A->rmap->rstart;
815       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819   } else {
820     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(lrows);CHKERRQ(ierr);
823 
824   /* only change matrix nonzero state if pattern was allowed to be changed */
825   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
826     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
827     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
835 {
836   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
837   PetscErrorCode    ierr;
838   PetscMPIInt       n = A->rmap->n;
839   PetscInt          i,j,r,m,p = 0,len = 0;
840   PetscInt          *lrows,*owners = A->rmap->range;
841   PetscSFNode       *rrows;
842   PetscSF           sf;
843   const PetscScalar *xx;
844   PetscScalar       *bb,*mask;
845   Vec               xmask,lmask;
846   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
847   const PetscInt    *aj, *ii,*ridx;
848   PetscScalar       *aa;
849 
850   PetscFunctionBegin;
851   /* Create SF where leaves are input rows and roots are owned rows */
852   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
853   for (r = 0; r < n; ++r) lrows[r] = -1;
854   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
855   for (r = 0; r < N; ++r) {
856     const PetscInt idx   = rows[r];
857     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
858     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
859       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
860     }
861     rrows[r].rank  = p;
862     rrows[r].index = rows[r] - owners[p];
863   }
864   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
865   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
866   /* Collect flags for rows to be zeroed */
867   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
868   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
870   /* Compress and put in row numbers */
871   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
872   /* zero diagonal part of matrix */
873   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
874   /* handle off diagonal part of matrix */
875   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
876   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
877   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
878   for (i=0; i<len; i++) bb[lrows[i]] = 1;
879   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
880   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
883   if (x) {
884     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
887     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
888   }
889   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
890   /* remove zeroed rows of off diagonal matrix */
891   ii = aij->i;
892   for (i=0; i<len; i++) {
893     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
894   }
895   /* loop over all elements of off process part of matrix zeroing removed columns*/
896   if (aij->compressedrow.use) {
897     m    = aij->compressedrow.nrows;
898     ii   = aij->compressedrow.i;
899     ridx = aij->compressedrow.rindex;
900     for (i=0; i<m; i++) {
901       n  = ii[i+1] - ii[i];
902       aj = aij->j + ii[i];
903       aa = aij->a + ii[i];
904 
905       for (j=0; j<n; j++) {
906         if (PetscAbsScalar(mask[*aj])) {
907           if (b) bb[*ridx] -= *aa*xx[*aj];
908           *aa = 0.0;
909         }
910         aa++;
911         aj++;
912       }
913       ridx++;
914     }
915   } else { /* do not use compressed row format */
916     m = l->B->rmap->n;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij->a + ii[i];
921       for (j=0; j<n; j++) {
922         if (PetscAbsScalar(mask[*aj])) {
923           if (b) bb[i] -= *aa*xx[*aj];
924           *aa = 0.0;
925         }
926         aa++;
927         aj++;
928       }
929     }
930   }
931   if (x) {
932     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
933     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
934   }
935   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
936   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
937   ierr = PetscFree(lrows);CHKERRQ(ierr);
938 
939   /* only change matrix nonzero state if pattern was allowed to be changed */
940   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
941     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
942     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMult_MPIAIJ"
949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       nt;
954 
955   PetscFunctionBegin;
956   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
957   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
958   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatMultAdd_MPIAIJ"
979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscBool      merged;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   if (!merged) {
1005     /* send it on its way */
1006     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1007     /* do local part */
1008     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1009     /* receive remote parts: note this assumes the values are not actually */
1010     /* added in yy until the next line, */
1011     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1012   } else {
1013     /* do local part */
1014     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1015     /* send it on its way */
1016     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017     /* values actually were received in the Begin() but we need to call this nop */
1018     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1025 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1026 {
1027   MPI_Comm       comm;
1028   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1029   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1030   IS             Me,Notme;
1031   PetscErrorCode ierr;
1032   PetscInt       M,N,first,last,*notme,i;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1039   if (!*f) PetscFunctionReturn(0);
1040   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1042   if (size == 1) PetscFunctionReturn(0);
1043 
1044   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1045   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1046   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1047   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1048   for (i=0; i<first; i++) notme[i] = i;
1049   for (i=last; i<M; i++) notme[i-last+first] = i;
1050   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1051   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1052   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1053   Aoff = Aoffs[0];
1054   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1055   Boff = Boffs[0];
1056   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1057   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1059   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1061   ierr = PetscFree(notme);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   /* send it on its way */
1076   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   /* do local part */
1078   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1079   /* receive remote parts */
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /*
1085   This only works correctly for square matrices where the subblock A->A is the
1086    diagonal block
1087 */
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1091 {
1092   PetscErrorCode ierr;
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094 
1095   PetscFunctionBegin;
1096   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1097   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1098   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatScale_MPIAIJ"
1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1111   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatDestroy_MPIAIJ"
1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1118 {
1119   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123 #if defined(PETSC_USE_LOG)
1124   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1125 #endif
1126   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1127   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1128   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1130 #if defined(PETSC_USE_CTABLE)
1131   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1132 #else
1133   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1134 #endif
1135   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1136   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1137   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1138   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1139   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1140   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1141 
1142   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1160 {
1161   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1162   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1163   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1164   PetscErrorCode ierr;
1165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1166   int            fd;
1167   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1168   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1169   PetscScalar    *column_values;
1170   PetscInt       message_count,flowcontrolcount;
1171   FILE           *file;
1172 
1173   PetscFunctionBegin;
1174   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1176   nz   = A->nz + B->nz;
1177   if (!rank) {
1178     header[0] = MAT_FILE_CLASSID;
1179     header[1] = mat->rmap->N;
1180     header[2] = mat->cmap->N;
1181 
1182     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1184     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     /* get largest number of rows any processor has */
1186     rlen  = mat->rmap->n;
1187     range = mat->rmap->range;
1188     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1189   } else {
1190     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     rlen = mat->rmap->n;
1192   }
1193 
1194   /* load up the local row counts */
1195   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1196   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1197 
1198   /* store the row lengths to the file */
1199   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1200   if (!rank) {
1201     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     for (i=1; i<size; i++) {
1203       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1204       rlen = range[i+1] - range[i];
1205       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1206       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1207     }
1208     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1209   } else {
1210     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1211     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1213   }
1214   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1215 
1216   /* load up the local column indices */
1217   nzmax = nz; /* th processor needs space a largest processor needs */
1218   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1220   cnt   = 0;
1221   for (i=0; i<mat->rmap->n; i++) {
1222     for (j=B->i[i]; j<B->i[i+1]; j++) {
1223       if ((col = garray[B->j[j]]) > cstart) break;
1224       column_indices[cnt++] = col;
1225     }
1226     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1227     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1228   }
1229   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1230 
1231   /* store the column indices to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1239       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1240       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1251 
1252   /* load up the local column values */
1253   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1254   cnt  = 0;
1255   for (i=0; i<mat->rmap->n; i++) {
1256     for (j=B->i[i]; j<B->i[i+1]; j++) {
1257       if (garray[B->j[j]] > cstart) break;
1258       column_values[cnt++] = B->a[j];
1259     }
1260     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1261     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1262   }
1263   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1264 
1265   /* store the column values to the file */
1266   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1267   if (!rank) {
1268     MPI_Status status;
1269     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1270     for (i=1; i<size; i++) {
1271       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1272       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1273       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1274       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #include <petscdraw.h>
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1295 {
1296   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1297   PetscErrorCode    ierr;
1298   PetscMPIInt       rank = aij->rank,size = aij->size;
1299   PetscBool         isdraw,iascii,isbinary;
1300   PetscViewer       sviewer;
1301   PetscViewerFormat format;
1302 
1303   PetscFunctionBegin;
1304   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1305   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1306   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1307   if (iascii) {
1308     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscBool inodes;
1312 
1313       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1314       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1315       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1317       if (!inodes) {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1323       }
1324       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1331       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1332       PetscFunctionReturn(0);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount,inodelimit,*inodes;
1335       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1336       if (inodes) {
1337         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1340       }
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(0);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1348       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1351     }
1352     PetscFunctionReturn(0);
1353   } else if (isdraw) {
1354     PetscDraw draw;
1355     PetscBool isnull;
1356     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1357     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1358   }
1359 
1360   {
1361     /* assemble the entire matrix onto first processor. */
1362     Mat        A;
1363     Mat_SeqAIJ *Aloc;
1364     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1365     MatScalar  *a;
1366 
1367     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1368     if (!rank) {
1369       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1370     } else {
1371       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1372     }
1373     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1374     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1375     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1376     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1377     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1378 
1379     /* copy over the A part */
1380     Aloc = (Mat_SeqAIJ*)aij->A->data;
1381     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1382     row  = mat->rmap->rstart;
1383     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1384     for (i=0; i<m; i++) {
1385       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1386       row++;
1387       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1388     }
1389     aj = Aloc->j;
1390     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1391 
1392     /* copy over the B part */
1393     Aloc = (Mat_SeqAIJ*)aij->B->data;
1394     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1395     row  = mat->rmap->rstart;
1396     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1397     ct   = cols;
1398     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1399     for (i=0; i<m; i++) {
1400       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1401       row++;
1402       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1403     }
1404     ierr = PetscFree(ct);CHKERRQ(ierr);
1405     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407     /*
1408        Everyone has to call to draw the matrix since the graphics waits are
1409        synchronized across all processors that share the PetscDraw object
1410     */
1411     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1412     if (!rank) {
1413       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1414     }
1415     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1416     ierr = MatDestroy(&A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_MPIAIJ"
1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1424 {
1425   PetscErrorCode ierr;
1426   PetscBool      iascii,isdraw,issocket,isbinary;
1427 
1428   PetscFunctionBegin;
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1433   if (iascii || isdraw || isbinary || issocket) {
1434     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatSOR_MPIAIJ"
1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1442 {
1443   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1444   PetscErrorCode ierr;
1445   Vec            bb1 = 0;
1446   PetscBool      hasop;
1447 
1448   PetscFunctionBegin;
1449   if (flag == SOR_APPLY_UPPER) {
1450     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451     PetscFunctionReturn(0);
1452   }
1453 
1454   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1455     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1456   }
1457 
1458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463 
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1478       its--;
1479     }
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_EISENSTAT) {
1508     Vec xx1;
1509 
1510     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1512 
1513     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     if (!mat->diag) {
1516       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1517       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1518     }
1519     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1520     if (hasop) {
1521       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1522     } else {
1523       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1524     }
1525     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1526 
1527     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1528 
1529     /* local sweep */
1530     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1531     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1532     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1533   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1534 
1535   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatPermute_MPIAIJ"
1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1542 {
1543   Mat            aA,aB,Aperm;
1544   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1545   PetscScalar    *aa,*ba;
1546   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1547   PetscSF        rowsf,sf;
1548   IS             parcolp = NULL;
1549   PetscBool      done;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1554   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1557 
1558   /* Invert row permutation to find out where my rows should go */
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1563   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565 
1566   /* Invert column permutation to find out where my columns should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1570   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1571   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1574 
1575   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1576   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1577   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1578 
1579   /* Find out where my gcols should go */
1580   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1581   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1588 
1589   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt row = rdest[i],rowner;
1594     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1595     for (j=ai[i]; j<ai[i+1]; j++) {
1596       PetscInt cowner,col = cdest[aj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601     for (j=bi[i]; j<bi[i+1]; j++) {
1602       PetscInt cowner,col = gcdest[bj[j]];
1603       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1604       if (rowner == cowner) dnnz[i]++;
1605       else onnz[i]++;
1606     }
1607   }
1608   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1613 
1614   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1617   for (i=0; i<m; i++) {
1618     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1619     PetscInt j0,rowlen;
1620     rowlen = ai[i+1] - ai[i];
1621     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625     rowlen = bi[i+1] - bi[i];
1626     for (j0=j=0; j<rowlen; j0=j) {
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1637   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1639   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1640   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1641   *B = Aperm;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1648 {
1649   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1650   Mat            A    = mat->A,B = mat->B;
1651   PetscErrorCode ierr;
1652   PetscReal      isend[5],irecv[5];
1653 
1654   PetscFunctionBegin;
1655   info->block_size = 1.0;
1656   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1659   isend[3] = info->memory;  isend[4] = info->mallocs;
1660 
1661   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1664   isend[3] += info->memory;  isend[4] += info->mallocs;
1665   if (flag == MAT_LOCAL) {
1666     info->nz_used      = isend[0];
1667     info->nz_allocated = isend[1];
1668     info->nz_unneeded  = isend[2];
1669     info->memory       = isend[3];
1670     info->mallocs      = isend[4];
1671   } else if (flag == MAT_GLOBAL_MAX) {
1672     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   } else if (flag == MAT_GLOBAL_SUM) {
1680     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1681 
1682     info->nz_used      = irecv[0];
1683     info->nz_allocated = irecv[1];
1684     info->nz_unneeded  = irecv[2];
1685     info->memory       = irecv[3];
1686     info->mallocs      = irecv[4];
1687   }
1688   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1689   info->fill_ratio_needed = 0;
1690   info->factor_mallocs    = 0;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatSetOption_MPIAIJ"
1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1697 {
1698   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   switch (op) {
1703   case MAT_NEW_NONZERO_LOCATIONS:
1704   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1705   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1706   case MAT_KEEP_NONZERO_PATTERN:
1707   case MAT_NEW_NONZERO_LOCATION_ERR:
1708   case MAT_USE_INODES:
1709   case MAT_IGNORE_ZERO_ENTRIES:
1710     MatCheckPreallocated(A,1);
1711     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1712     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_ROW_ORIENTED:
1715     a->roworiented = flg;
1716 
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1719     break;
1720   case MAT_NEW_DIAGONALS:
1721     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1722     break;
1723   case MAT_IGNORE_OFF_PROC_ENTRIES:
1724     a->donotstash = flg;
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_STRUCTURALLY_SYMMETRIC:
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     break;
1742   case MAT_HERMITIAN:
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_SYMMETRY_ETERNAL:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   default:
1749     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatGetRow_MPIAIJ"
1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1757 {
1758   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1759   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1760   PetscErrorCode ierr;
1761   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1762   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1763   PetscInt       *cmap,*idx_p;
1764 
1765   PetscFunctionBegin;
1766   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1767   mat->getrowactive = PETSC_TRUE;
1768 
1769   if (!mat->rowvalues && (idx || v)) {
1770     /*
1771         allocate enough space to hold information from the longest row.
1772     */
1773     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1774     PetscInt   max = 1,tmp;
1775     for (i=0; i<matin->rmap->n; i++) {
1776       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1777       if (max < tmp) max = tmp;
1778     }
1779     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1780   }
1781 
1782   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1783   lrow = row - rstart;
1784 
1785   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1786   if (!v)   {pvA = 0; pvB = 0;}
1787   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1788   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   nztot = nzA + nzB;
1791 
1792   cmap = mat->garray;
1793   if (v  || idx) {
1794     if (nztot) {
1795       /* Sort by increasing column numbers, assuming A and B already sorted */
1796       PetscInt imark = -1;
1797       if (v) {
1798         *v = v_p = mat->rowvalues;
1799         for (i=0; i<nzB; i++) {
1800           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1801           else break;
1802         }
1803         imark = i;
1804         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1805         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1806       }
1807       if (idx) {
1808         *idx = idx_p = mat->rowindices;
1809         if (imark > -1) {
1810           for (i=0; i<imark; i++) {
1811             idx_p[i] = cmap[cworkB[i]];
1812           }
1813         } else {
1814           for (i=0; i<nzB; i++) {
1815             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1816             else break;
1817           }
1818           imark = i;
1819         }
1820         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1821         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1822       }
1823     } else {
1824       if (idx) *idx = 0;
1825       if (v)   *v   = 0;
1826     }
1827   }
1828   *nz  = nztot;
1829   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1830   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1839 
1840   PetscFunctionBegin;
1841   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1842   aij->getrowactive = PETSC_FALSE;
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatNorm_MPIAIJ"
1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1849 {
1850   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1851   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1852   PetscErrorCode ierr;
1853   PetscInt       i,j,cstart = mat->cmap->rstart;
1854   PetscReal      sum = 0.0;
1855   MatScalar      *v;
1856 
1857   PetscFunctionBegin;
1858   if (aij->size == 1) {
1859     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1860   } else {
1861     if (type == NORM_FROBENIUS) {
1862       v = amat->a;
1863       for (i=0; i<amat->nz; i++) {
1864         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1865       }
1866       v = bmat->a;
1867       for (i=0; i<bmat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1871       *norm = PetscSqrtReal(*norm);
1872     } else if (type == NORM_1) { /* max column norm */
1873       PetscReal *tmp,*tmp2;
1874       PetscInt  *jj,*garray = aij->garray;
1875       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1876       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1877       *norm = 0.0;
1878       v     = amat->a; jj = amat->j;
1879       for (j=0; j<amat->nz; j++) {
1880         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1881       }
1882       v = bmat->a; jj = bmat->j;
1883       for (j=0; j<bmat->nz; j++) {
1884         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1885       }
1886       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       for (j=0; j<mat->cmap->N; j++) {
1888         if (tmp2[j] > *norm) *norm = tmp2[j];
1889       }
1890       ierr = PetscFree(tmp);CHKERRQ(ierr);
1891       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1892     } else if (type == NORM_INFINITY) { /* max row norm */
1893       PetscReal ntemp = 0.0;
1894       for (j=0; j<aij->A->rmap->n; j++) {
1895         v   = amat->a + amat->i[j];
1896         sum = 0.0;
1897         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1898           sum += PetscAbsScalar(*v); v++;
1899         }
1900         v = bmat->a + bmat->i[j];
1901         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1902           sum += PetscAbsScalar(*v); v++;
1903         }
1904         if (sum > ntemp) ntemp = sum;
1905       }
1906       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatTranspose_MPIAIJ"
1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1915 {
1916   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1917   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1918   PetscErrorCode ierr;
1919   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1920   PetscInt       cstart = A->cmap->rstart,ncol;
1921   Mat            B;
1922   MatScalar      *array;
1923 
1924   PetscFunctionBegin;
1925   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1926 
1927   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1928   ai = Aloc->i; aj = Aloc->j;
1929   bi = Bloc->i; bj = Bloc->j;
1930   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1931     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1932     PetscSFNode          *oloc;
1933     PETSC_UNUSED PetscSF sf;
1934 
1935     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1936     /* compute d_nnz for preallocation */
1937     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<ai[ma]; i++) {
1939       d_nnz[aj[i]]++;
1940       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1941     }
1942     /* compute local off-diagonal contributions */
1943     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1944     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1945     /* map those to global */
1946     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1947     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1948     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1949     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1950     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1951     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1952     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1953 
1954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1956     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1957     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1958     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1959     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1960   } else {
1961     B    = *matout;
1962     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1963     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964   }
1965 
1966   /* copy over the A part */
1967   array = Aloc->a;
1968   row   = A->rmap->rstart;
1969   for (i=0; i<ma; i++) {
1970     ncol = ai[i+1]-ai[i];
1971     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1972     row++;
1973     array += ncol; aj += ncol;
1974   }
1975   aj = Aloc->j;
1976   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1977 
1978   /* copy over the B part */
1979   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1980   array = Bloc->a;
1981   row   = A->rmap->rstart;
1982   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1983   cols_tmp = cols;
1984   for (i=0; i<mb; i++) {
1985     ncol = bi[i+1]-bi[i];
1986     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1987     row++;
1988     array += ncol; cols_tmp += ncol;
1989   }
1990   ierr = PetscFree(cols);CHKERRQ(ierr);
1991 
1992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1995     *matout = B;
1996   } else {
1997     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2005 {
2006   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2007   Mat            a    = aij->A,b = aij->B;
2008   PetscErrorCode ierr;
2009   PetscInt       s1,s2,s3;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2013   if (rr) {
2014     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2015     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2016     /* Overlap communication with computation. */
2017     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018   }
2019   if (ll) {
2020     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2021     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2022     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2023   }
2024   /* scale  the diagonal block */
2025   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2026 
2027   if (rr) {
2028     /* Do a scatter end and then right scale the off-diagonal block */
2029     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2030     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2038 {
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   PetscErrorCode ierr;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatEqual_MPIAIJ"
2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2050 {
2051   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2052   Mat            a,b,c,d;
2053   PetscBool      flg;
2054   PetscErrorCode ierr;
2055 
2056   PetscFunctionBegin;
2057   a = matA->A; b = matA->B;
2058   c = matB->A; d = matB->B;
2059 
2060   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2061   if (flg) {
2062     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2063   }
2064   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatCopy_MPIAIJ"
2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2071 {
2072   PetscErrorCode ierr;
2073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2074   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2075 
2076   PetscFunctionBegin;
2077   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2078   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2079     /* because of the column compression in the off-processor part of the matrix a->B,
2080        the number of columns in a->B and b->B may be different, hence we cannot call
2081        the MatCopy() directly on the two parts. If need be, we can provide a more
2082        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2083        then copying the submatrices */
2084     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2085   } else {
2086     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2087     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatSetUp_MPIAIJ"
2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2095 {
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 /*
2104    Computes the number of nonzeros per row needed for preallocation when X and Y
2105    have different nonzero structure.
2106 */
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2110 {
2111   PetscInt       i,j,k,nzx,nzy;
2112 
2113   PetscFunctionBegin;
2114   /* Set the number of nonzeros in the new matrix */
2115   for (i=0; i<m; i++) {
2116     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2117     nzx = xi[i+1] - xi[i];
2118     nzy = yi[i+1] - yi[i];
2119     nnz[i] = 0;
2120     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2121       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2122       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2123       nnz[i]++;
2124     }
2125     for (; k<nzy; k++) nnz[i]++;
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       m = Y->rmap->N;
2137   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2138   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatAXPY_MPIAIJ"
2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2151   PetscBLASInt   bnz,one=1;
2152   Mat_SeqAIJ     *x,*y;
2153 
2154   PetscFunctionBegin;
2155   if (str == SAME_NONZERO_PATTERN) {
2156     PetscScalar alpha = a;
2157     x    = (Mat_SeqAIJ*)xx->A->data;
2158     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2159     y    = (Mat_SeqAIJ*)yy->A->data;
2160     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2161     x    = (Mat_SeqAIJ*)xx->B->data;
2162     y    = (Mat_SeqAIJ*)yy->B->data;
2163     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2164     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2165     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2166   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2167     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2168   } else {
2169     Mat      B;
2170     PetscInt *nnz_d,*nnz_o;
2171     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2172     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2173     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2174     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2175     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2176     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2177     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2178     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2179     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2180     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2181     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2182     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2183     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2184     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2185   }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatConjugate_MPIAIJ"
2193 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2194 {
2195 #if defined(PETSC_USE_COMPLEX)
2196   PetscErrorCode ierr;
2197   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2201   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2202 #else
2203   PetscFunctionBegin;
2204 #endif
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatRealPart_MPIAIJ"
2210 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2211 {
2212   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2217   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2223 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2224 {
2225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2226   PetscErrorCode ierr;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2230   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #if defined(PETSC_HAVE_PBGL)
2235 
2236 #include <boost/parallel/mpi/bsp_process_group.hpp>
2237 #include <boost/graph/distributed/ilu_default_graph.hpp>
2238 #include <boost/graph/distributed/ilu_0_block.hpp>
2239 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2240 #include <boost/graph/distributed/petsc/interface.hpp>
2241 #include <boost/multi_array.hpp>
2242 #include <boost/parallel/distributed_property_map->hpp>
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2246 /*
2247   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2248 */
2249 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2250 {
2251   namespace petsc = boost::distributed::petsc;
2252 
2253   namespace graph_dist = boost::graph::distributed;
2254   using boost::graph::distributed::ilu_default::process_group_type;
2255   using boost::graph::ilu_permuted;
2256 
2257   PetscBool      row_identity, col_identity;
2258   PetscContainer c;
2259   PetscInt       m, n, M, N;
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2264   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2265   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2266   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2267 
2268   process_group_type pg;
2269   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2270   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2271   lgraph_type& level_graph = *lgraph_p;
2272   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2273 
2274   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2275   ilu_permuted(level_graph);
2276 
2277   /* put together the new matrix */
2278   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2279   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2280   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2281   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2282   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2283   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2284   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286 
2287   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2288   ierr = PetscContainerSetPointer(c, lgraph_p);
2289   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2290   ierr = PetscContainerDestroy(&c);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2296 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2297 {
2298   PetscFunctionBegin;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatSolve_MPIAIJ"
2304 /*
2305   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2306 */
2307 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2308 {
2309   namespace graph_dist = boost::graph::distributed;
2310 
2311   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2312   lgraph_type    *lgraph_p;
2313   PetscContainer c;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2318   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2319   ierr = VecCopy(b, x);CHKERRQ(ierr);
2320 
2321   PetscScalar *array_x;
2322   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2323   PetscInt sx;
2324   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2325 
2326   PetscScalar *array_b;
2327   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2328   PetscInt sb;
2329   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2330 
2331   lgraph_type& level_graph = *lgraph_p;
2332   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2333 
2334   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2335   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2336   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2337 
2338   typedef boost::iterator_property_map<array_ref_type::iterator,
2339                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2340   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2341   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2342 
2343   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2344   PetscFunctionReturn(0);
2345 }
2346 #endif
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2350 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2351 {
2352   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2353   PetscErrorCode ierr;
2354   PetscInt       i,*idxb = 0;
2355   PetscScalar    *va,*vb;
2356   Vec            vtmp;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2361   if (idx) {
2362     for (i=0; i<A->rmap->n; i++) {
2363       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2364     }
2365   }
2366 
2367   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2368   if (idx) {
2369     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2370   }
2371   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2372   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2373 
2374   for (i=0; i<A->rmap->n; i++) {
2375     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2376       va[i] = vb[i];
2377       if (idx) idx[i] = a->garray[idxb[i]];
2378     }
2379   }
2380 
2381   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2382   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2383   ierr = PetscFree(idxb);CHKERRQ(ierr);
2384   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++) {
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   ierr = PetscFree(idxb);CHKERRQ(ierr);
2424   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2430 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2431 {
2432   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2433   PetscInt       n      = A->rmap->n;
2434   PetscInt       cstart = A->cmap->rstart;
2435   PetscInt       *cmap  = mat->garray;
2436   PetscInt       *diagIdx, *offdiagIdx;
2437   Vec            diagV, offdiagV;
2438   PetscScalar    *a, *diagA, *offdiagA;
2439   PetscInt       r;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2446   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   for (r = 0; r < n; ++r) {
2452     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2453       a[r]   = diagA[r];
2454       idx[r] = cstart + diagIdx[r];
2455     } else {
2456       a[r]   = offdiagA[r];
2457       idx[r] = cmap[offdiagIdx[r]];
2458     }
2459   }
2460   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2471 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2472 {
2473   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2474   PetscInt       n      = A->rmap->n;
2475   PetscInt       cstart = A->cmap->rstart;
2476   PetscInt       *cmap  = mat->garray;
2477   PetscInt       *diagIdx, *offdiagIdx;
2478   Vec            diagV, offdiagV;
2479   PetscScalar    *a, *diagA, *offdiagA;
2480   PetscInt       r;
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2485   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2487   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2490   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2491   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2492   for (r = 0; r < n; ++r) {
2493     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2494       a[r]   = diagA[r];
2495       idx[r] = cstart + diagIdx[r];
2496     } else {
2497       a[r]   = offdiagA[r];
2498       idx[r] = cmap[offdiagIdx[r]];
2499     }
2500   }
2501   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2502   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2503   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2504   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2505   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2506   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2512 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2513 {
2514   PetscErrorCode ierr;
2515   Mat            *dummy;
2516 
2517   PetscFunctionBegin;
2518   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2519   *newmat = *dummy;
2520   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2526 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2527 {
2528   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2538 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2539 {
2540   PetscErrorCode ierr;
2541   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2542 
2543   PetscFunctionBegin;
2544   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2545   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2546   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2547   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 /* -------------------------------------------------------------------*/
2552 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2553                                        MatGetRow_MPIAIJ,
2554                                        MatRestoreRow_MPIAIJ,
2555                                        MatMult_MPIAIJ,
2556                                 /* 4*/ MatMultAdd_MPIAIJ,
2557                                        MatMultTranspose_MPIAIJ,
2558                                        MatMultTransposeAdd_MPIAIJ,
2559 #if defined(PETSC_HAVE_PBGL)
2560                                        MatSolve_MPIAIJ,
2561 #else
2562                                        0,
2563 #endif
2564                                        0,
2565                                        0,
2566                                 /*10*/ 0,
2567                                        0,
2568                                        0,
2569                                        MatSOR_MPIAIJ,
2570                                        MatTranspose_MPIAIJ,
2571                                 /*15*/ MatGetInfo_MPIAIJ,
2572                                        MatEqual_MPIAIJ,
2573                                        MatGetDiagonal_MPIAIJ,
2574                                        MatDiagonalScale_MPIAIJ,
2575                                        MatNorm_MPIAIJ,
2576                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2577                                        MatAssemblyEnd_MPIAIJ,
2578                                        MatSetOption_MPIAIJ,
2579                                        MatZeroEntries_MPIAIJ,
2580                                 /*24*/ MatZeroRows_MPIAIJ,
2581                                        0,
2582 #if defined(PETSC_HAVE_PBGL)
2583                                        0,
2584 #else
2585                                        0,
2586 #endif
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIAIJ,
2590 #if defined(PETSC_HAVE_PBGL)
2591                                        0,
2592 #else
2593                                        0,
2594 #endif
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*34*/ MatDuplicate_MPIAIJ,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                 /*39*/ MatAXPY_MPIAIJ,
2604                                        MatGetSubMatrices_MPIAIJ,
2605                                        MatIncreaseOverlap_MPIAIJ,
2606                                        MatGetValues_MPIAIJ,
2607                                        MatCopy_MPIAIJ,
2608                                 /*44*/ MatGetRowMax_MPIAIJ,
2609                                        MatScale_MPIAIJ,
2610                                        0,
2611                                        MatDiagonalSet_MPIAIJ,
2612                                        MatZeroRowsColumns_MPIAIJ,
2613                                 /*49*/ MatSetRandom_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2619                                        0,
2620                                        MatSetUnfactored_MPIAIJ,
2621                                        MatPermute_MPIAIJ,
2622                                        0,
2623                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2624                                        MatDestroy_MPIAIJ,
2625                                        MatView_MPIAIJ,
2626                                        0,
2627                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2628                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2629                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2634                                        MatGetRowMinAbs_MPIAIJ,
2635                                        0,
2636                                        MatSetColoring_MPIAIJ,
2637                                        0,
2638                                        MatSetValuesAdifor_MPIAIJ,
2639                                 /*75*/ MatFDColoringApply_AIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatFindZeroDiagonals_MPIAIJ,
2644                                 /*80*/ 0,
2645                                        0,
2646                                        0,
2647                                 /*83*/ MatLoad_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2654                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2655                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2656                                        MatPtAP_MPIAIJ_MPIAIJ,
2657                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2658                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*99*/ 0,
2664                                        0,
2665                                        0,
2666                                        MatConjugate_MPIAIJ,
2667                                        0,
2668                                 /*104*/MatSetValuesRow_MPIAIJ,
2669                                        MatRealPart_MPIAIJ,
2670                                        MatImaginaryPart_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                 /*109*/0,
2674                                        0,
2675                                        MatGetRowMin_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*119*/0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        MatGetMultiProcBlock_MPIAIJ,
2688                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2689                                        MatGetColumnNorms_MPIAIJ,
2690                                        MatInvertBlockDiagonal_MPIAIJ,
2691                                        0,
2692                                        MatGetSubMatricesParallel_MPIAIJ,
2693                                 /*129*/0,
2694                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2695                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2696                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                 /*134*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                 /*139*/0,
2704                                        0,
2705                                        0,
2706                                        MatFDColoringSetUp_MPIXAIJ,
2707                                        0,
2708                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2709 };
2710 
2711 /* ----------------------------------------------------------------------------------------*/
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2715 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2716 {
2717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2722   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2728 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2741 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2742 {
2743   Mat_MPIAIJ     *b;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2748   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2749   b = (Mat_MPIAIJ*)B->data;
2750 
2751   if (!B->preallocated) {
2752     /* Explicitly create 2 MATSEQAIJ matrices. */
2753     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2754     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2755     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2756     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2757     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2758     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2759     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2760     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2761     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2762     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763   }
2764 
2765   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2766   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2767   B->preallocated = PETSC_TRUE;
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2773 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2774 {
2775   Mat            mat;
2776   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2777   PetscErrorCode ierr;
2778 
2779   PetscFunctionBegin;
2780   *newmat = 0;
2781   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2782   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2783   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2784   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2785   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2786   a       = (Mat_MPIAIJ*)mat->data;
2787 
2788   mat->factortype   = matin->factortype;
2789   mat->assembled    = PETSC_TRUE;
2790   mat->insertmode   = NOT_SET_VALUES;
2791   mat->preallocated = PETSC_TRUE;
2792 
2793   a->size         = oldmat->size;
2794   a->rank         = oldmat->rank;
2795   a->donotstash   = oldmat->donotstash;
2796   a->roworiented  = oldmat->roworiented;
2797   a->rowindices   = 0;
2798   a->rowvalues    = 0;
2799   a->getrowactive = PETSC_FALSE;
2800 
2801   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2803 
2804   if (oldmat->colmap) {
2805 #if defined(PETSC_USE_CTABLE)
2806     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2807 #else
2808     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2809     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2810     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2811 #endif
2812   } else a->colmap = 0;
2813   if (oldmat->garray) {
2814     PetscInt len;
2815     len  = oldmat->B->cmap->n;
2816     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2817     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2818     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2819   } else a->garray = 0;
2820 
2821   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2822   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2823   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2824   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2825   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2826   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2827   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2828   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2829   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2830   *newmat = mat;
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatLoad_MPIAIJ"
2838 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2839 {
2840   PetscScalar    *vals,*svals;
2841   MPI_Comm       comm;
2842   PetscErrorCode ierr;
2843   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2844   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2845   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2846   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2847   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2848   int            fd;
2849   PetscInt       bs = newMat->rmap->bs;
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2854   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2855   if (!rank) {
2856     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2857     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2858     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2859   }
2860 
2861   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2862   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2863   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2864   if (bs < 0) bs = 1;
2865 
2866   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2867 
2868   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2869   M    = header[1]; N = header[2];
2870   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2871   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2872   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2873 
2874   /* If global sizes are set, check if they are consistent with that given in the file */
2875   if (sizesset) {
2876     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2877   }
2878   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2879   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2880 
2881   /* determine ownership of all (block) rows */
2882   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2883   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2884   else m = newMat->rmap->n; /* Set by user */
2885 
2886   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
2887   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2888 
2889   /* First process needs enough room for process with most rows */
2890   if (!rank) {
2891     mmax = rowners[1];
2892     for (i=2; i<=size; i++) {
2893       mmax = PetscMax(mmax, rowners[i]);
2894     }
2895   } else mmax = -1;             /* unused, but compilers complain */
2896 
2897   rowners[0] = 0;
2898   for (i=2; i<=size; i++) {
2899     rowners[i] += rowners[i-1];
2900   }
2901   rstart = rowners[rank];
2902   rend   = rowners[rank+1];
2903 
2904   /* distribute row lengths to all processors */
2905   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2906   if (!rank) {
2907     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2908     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2909     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2910     for (j=0; j<m; j++) {
2911       procsnz[0] += ourlens[j];
2912     }
2913     for (i=1; i<size; i++) {
2914       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2915       /* calculate the number of nonzeros on each processor */
2916       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2917         procsnz[i] += rowlengths[j];
2918       }
2919       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2920     }
2921     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2922   } else {
2923     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2924   }
2925 
2926   if (!rank) {
2927     /* determine max buffer needed and allocate it */
2928     maxnz = 0;
2929     for (i=0; i<size; i++) {
2930       maxnz = PetscMax(maxnz,procsnz[i]);
2931     }
2932     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2933 
2934     /* read in my part of the matrix column indices  */
2935     nz   = procsnz[0];
2936     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2937     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2938 
2939     /* read in every one elses and ship off */
2940     for (i=1; i<size; i++) {
2941       nz   = procsnz[i];
2942       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2943       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2944     }
2945     ierr = PetscFree(cols);CHKERRQ(ierr);
2946   } else {
2947     /* determine buffer space needed for message */
2948     nz = 0;
2949     for (i=0; i<m; i++) {
2950       nz += ourlens[i];
2951     }
2952     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2953 
2954     /* receive message of column indices*/
2955     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2956   }
2957 
2958   /* determine column ownership if matrix is not square */
2959   if (N != M) {
2960     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2961     else n = newMat->cmap->n;
2962     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2963     cstart = cend - n;
2964   } else {
2965     cstart = rstart;
2966     cend   = rend;
2967     n      = cend - cstart;
2968   }
2969 
2970   /* loop over local rows, determining number of off diagonal entries */
2971   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2972   jj   = 0;
2973   for (i=0; i<m; i++) {
2974     for (j=0; j<ourlens[i]; j++) {
2975       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2976       jj++;
2977     }
2978   }
2979 
2980   for (i=0; i<m; i++) {
2981     ourlens[i] -= offlens[i];
2982   }
2983   if (!sizesset) {
2984     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2985   }
2986 
2987   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2988 
2989   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2990 
2991   for (i=0; i<m; i++) {
2992     ourlens[i] += offlens[i];
2993   }
2994 
2995   if (!rank) {
2996     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
2997 
2998     /* read in my part of the matrix numerical values  */
2999     nz   = procsnz[0];
3000     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3001 
3002     /* insert into matrix */
3003     jj      = rstart;
3004     smycols = mycols;
3005     svals   = vals;
3006     for (i=0; i<m; i++) {
3007       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3008       smycols += ourlens[i];
3009       svals   += ourlens[i];
3010       jj++;
3011     }
3012 
3013     /* read in other processors and ship out */
3014     for (i=1; i<size; i++) {
3015       nz   = procsnz[i];
3016       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3018     }
3019     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3020   } else {
3021     /* receive numeric values */
3022     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3023 
3024     /* receive message of values*/
3025     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3026 
3027     /* insert into matrix */
3028     jj      = rstart;
3029     smycols = mycols;
3030     svals   = vals;
3031     for (i=0; i<m; i++) {
3032       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3033       smycols += ourlens[i];
3034       svals   += ourlens[i];
3035       jj++;
3036     }
3037   }
3038   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3039   ierr = PetscFree(vals);CHKERRQ(ierr);
3040   ierr = PetscFree(mycols);CHKERRQ(ierr);
3041   ierr = PetscFree(rowners);CHKERRQ(ierr);
3042   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 #undef __FUNCT__
3048 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3049 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3050 {
3051   PetscErrorCode ierr;
3052   IS             iscol_local;
3053   PetscInt       csize;
3054 
3055   PetscFunctionBegin;
3056   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3057   if (call == MAT_REUSE_MATRIX) {
3058     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3059     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3060   } else {
3061     PetscInt cbs;
3062     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3063     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3064     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3065   }
3066   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3067   if (call == MAT_INITIAL_MATRIX) {
3068     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3069     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3070   }
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3075 #undef __FUNCT__
3076 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3077 /*
3078     Not great since it makes two copies of the submatrix, first an SeqAIJ
3079   in local and then by concatenating the local matrices the end result.
3080   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3081 
3082   Note: This requires a sequential iscol with all indices.
3083 */
3084 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3085 {
3086   PetscErrorCode ierr;
3087   PetscMPIInt    rank,size;
3088   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3089   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3090   PetscBool      allcolumns, colflag;
3091   Mat            M,Mreuse;
3092   MatScalar      *vwork,*aa;
3093   MPI_Comm       comm;
3094   Mat_SeqAIJ     *aij;
3095 
3096   PetscFunctionBegin;
3097   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3098   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3100 
3101   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3102   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3103   if (colflag && ncol == mat->cmap->N) {
3104     allcolumns = PETSC_TRUE;
3105   } else {
3106     allcolumns = PETSC_FALSE;
3107   }
3108   if (call ==  MAT_REUSE_MATRIX) {
3109     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3110     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3111     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3112   } else {
3113     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3114   }
3115 
3116   /*
3117       m - number of local rows
3118       n - number of columns (same on all processors)
3119       rstart - first row in new global matrix generated
3120   */
3121   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3122   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3123   if (call == MAT_INITIAL_MATRIX) {
3124     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3125     ii  = aij->i;
3126     jj  = aij->j;
3127 
3128     /*
3129         Determine the number of non-zeros in the diagonal and off-diagonal
3130         portions of the matrix in order to do correct preallocation
3131     */
3132 
3133     /* first get start and end of "diagonal" columns */
3134     if (csize == PETSC_DECIDE) {
3135       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3136       if (mglobal == n) { /* square matrix */
3137         nlocal = m;
3138       } else {
3139         nlocal = n/size + ((n % size) > rank);
3140       }
3141     } else {
3142       nlocal = csize;
3143     }
3144     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3145     rstart = rend - nlocal;
3146     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3147 
3148     /* next, compute all the lengths */
3149     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3150     olens = dlens + m;
3151     for (i=0; i<m; i++) {
3152       jend = ii[i+1] - ii[i];
3153       olen = 0;
3154       dlen = 0;
3155       for (j=0; j<jend; j++) {
3156         if (*jj < rstart || *jj >= rend) olen++;
3157         else dlen++;
3158         jj++;
3159       }
3160       olens[i] = olen;
3161       dlens[i] = dlen;
3162     }
3163     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3164     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3165     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3166     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3167     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3168     ierr = PetscFree(dlens);CHKERRQ(ierr);
3169   } else {
3170     PetscInt ml,nl;
3171 
3172     M    = *newmat;
3173     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3174     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3175     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3176     /*
3177          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3178        rather than the slower MatSetValues().
3179     */
3180     M->was_assembled = PETSC_TRUE;
3181     M->assembled     = PETSC_FALSE;
3182   }
3183   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3184   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3185   ii   = aij->i;
3186   jj   = aij->j;
3187   aa   = aij->a;
3188   for (i=0; i<m; i++) {
3189     row   = rstart + i;
3190     nz    = ii[i+1] - ii[i];
3191     cwork = jj;     jj += nz;
3192     vwork = aa;     aa += nz;
3193     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3194   }
3195 
3196   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3197   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3198   *newmat = M;
3199 
3200   /* save submatrix used in processor for next request */
3201   if (call ==  MAT_INITIAL_MATRIX) {
3202     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3203     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3204   }
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3210 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3211 {
3212   PetscInt       m,cstart, cend,j,nnz,i,d;
3213   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3214   const PetscInt *JJ;
3215   PetscScalar    *values;
3216   PetscErrorCode ierr;
3217 
3218   PetscFunctionBegin;
3219   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3220 
3221   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3222   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3223   m      = B->rmap->n;
3224   cstart = B->cmap->rstart;
3225   cend   = B->cmap->rend;
3226   rstart = B->rmap->rstart;
3227 
3228   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3229 
3230 #if defined(PETSC_USE_DEBUGGING)
3231   for (i=0; i<m; i++) {
3232     nnz = Ii[i+1]- Ii[i];
3233     JJ  = J + Ii[i];
3234     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3235     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3236     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3237   }
3238 #endif
3239 
3240   for (i=0; i<m; i++) {
3241     nnz     = Ii[i+1]- Ii[i];
3242     JJ      = J + Ii[i];
3243     nnz_max = PetscMax(nnz_max,nnz);
3244     d       = 0;
3245     for (j=0; j<nnz; j++) {
3246       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3247     }
3248     d_nnz[i] = d;
3249     o_nnz[i] = nnz - d;
3250   }
3251   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3252   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3253 
3254   if (v) values = (PetscScalar*)v;
3255   else {
3256     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3257   }
3258 
3259   for (i=0; i<m; i++) {
3260     ii   = i + rstart;
3261     nnz  = Ii[i+1]- Ii[i];
3262     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3263   }
3264   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266 
3267   if (!v) {
3268     ierr = PetscFree(values);CHKERRQ(ierr);
3269   }
3270   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3276 /*@
3277    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3278    (the default parallel PETSc format).
3279 
3280    Collective on MPI_Comm
3281 
3282    Input Parameters:
3283 +  B - the matrix
3284 .  i - the indices into j for the start of each local row (starts with zero)
3285 .  j - the column indices for each local row (starts with zero)
3286 -  v - optional values in the matrix
3287 
3288    Level: developer
3289 
3290    Notes:
3291        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3292      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3293      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3294 
3295        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3296 
3297        The format which is used for the sparse matrix input, is equivalent to a
3298     row-major ordering.. i.e for the following matrix, the input data expected is
3299     as shown:
3300 
3301         1 0 0
3302         2 0 3     P0
3303        -------
3304         4 5 6     P1
3305 
3306      Process0 [P0]: rows_owned=[0,1]
3307         i =  {0,1,3}  [size = nrow+1  = 2+1]
3308         j =  {0,0,2}  [size = nz = 6]
3309         v =  {1,2,3}  [size = nz = 6]
3310 
3311      Process1 [P1]: rows_owned=[2]
3312         i =  {0,3}    [size = nrow+1  = 1+1]
3313         j =  {0,1,2}  [size = nz = 6]
3314         v =  {4,5,6}  [size = nz = 6]
3315 
3316 .keywords: matrix, aij, compressed row, sparse, parallel
3317 
3318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3319           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3320 @*/
3321 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3322 {
3323   PetscErrorCode ierr;
3324 
3325   PetscFunctionBegin;
3326   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 #undef __FUNCT__
3331 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3332 /*@C
3333    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3334    (the default parallel PETSc format).  For good matrix assembly performance
3335    the user should preallocate the matrix storage by setting the parameters
3336    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3337    performance can be increased by more than a factor of 50.
3338 
3339    Collective on MPI_Comm
3340 
3341    Input Parameters:
3342 +  B - the matrix
3343 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3344            (same value is used for all local rows)
3345 .  d_nnz - array containing the number of nonzeros in the various rows of the
3346            DIAGONAL portion of the local submatrix (possibly different for each row)
3347            or NULL, if d_nz is used to specify the nonzero structure.
3348            The size of this array is equal to the number of local rows, i.e 'm'.
3349            For matrices that will be factored, you must leave room for (and set)
3350            the diagonal entry even if it is zero.
3351 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3352            submatrix (same value is used for all local rows).
3353 -  o_nnz - array containing the number of nonzeros in the various rows of the
3354            OFF-DIAGONAL portion of the local submatrix (possibly different for
3355            each row) or NULL, if o_nz is used to specify the nonzero
3356            structure. The size of this array is equal to the number
3357            of local rows, i.e 'm'.
3358 
3359    If the *_nnz parameter is given then the *_nz parameter is ignored
3360 
3361    The AIJ format (also called the Yale sparse matrix format or
3362    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3363    storage.  The stored row and column indices begin with zero.
3364    See Users-Manual: ch_mat for details.
3365 
3366    The parallel matrix is partitioned such that the first m0 rows belong to
3367    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3368    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3369 
3370    The DIAGONAL portion of the local submatrix of a processor can be defined
3371    as the submatrix which is obtained by extraction the part corresponding to
3372    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3373    first row that belongs to the processor, r2 is the last row belonging to
3374    the this processor, and c1-c2 is range of indices of the local part of a
3375    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3376    common case of a square matrix, the row and column ranges are the same and
3377    the DIAGONAL part is also square. The remaining portion of the local
3378    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3379 
3380    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3381 
3382    You can call MatGetInfo() to get information on how effective the preallocation was;
3383    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3384    You can also run with the option -info and look for messages with the string
3385    malloc in them to see if additional memory allocation was needed.
3386 
3387    Example usage:
3388 
3389    Consider the following 8x8 matrix with 34 non-zero values, that is
3390    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3391    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3392    as follows:
3393 
3394 .vb
3395             1  2  0  |  0  3  0  |  0  4
3396     Proc0   0  5  6  |  7  0  0  |  8  0
3397             9  0 10  | 11  0  0  | 12  0
3398     -------------------------------------
3399            13  0 14  | 15 16 17  |  0  0
3400     Proc1   0 18  0  | 19 20 21  |  0  0
3401             0  0  0  | 22 23  0  | 24  0
3402     -------------------------------------
3403     Proc2  25 26 27  |  0  0 28  | 29  0
3404            30  0  0  | 31 32 33  |  0 34
3405 .ve
3406 
3407    This can be represented as a collection of submatrices as:
3408 
3409 .vb
3410       A B C
3411       D E F
3412       G H I
3413 .ve
3414 
3415    Where the submatrices A,B,C are owned by proc0, D,E,F are
3416    owned by proc1, G,H,I are owned by proc2.
3417 
3418    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3419    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3420    The 'M','N' parameters are 8,8, and have the same values on all procs.
3421 
3422    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3423    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3424    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3425    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3426    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3427    matrix, ans [DF] as another SeqAIJ matrix.
3428 
3429    When d_nz, o_nz parameters are specified, d_nz storage elements are
3430    allocated for every row of the local diagonal submatrix, and o_nz
3431    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3432    One way to choose d_nz and o_nz is to use the max nonzerors per local
3433    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3434    In this case, the values of d_nz,o_nz are:
3435 .vb
3436      proc0 : dnz = 2, o_nz = 2
3437      proc1 : dnz = 3, o_nz = 2
3438      proc2 : dnz = 1, o_nz = 4
3439 .ve
3440    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3441    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3442    for proc3. i.e we are using 12+15+10=37 storage locations to store
3443    34 values.
3444 
3445    When d_nnz, o_nnz parameters are specified, the storage is specified
3446    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3447    In the above case the values for d_nnz,o_nnz are:
3448 .vb
3449      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3450      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3451      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3452 .ve
3453    Here the space allocated is sum of all the above values i.e 34, and
3454    hence pre-allocation is perfect.
3455 
3456    Level: intermediate
3457 
3458 .keywords: matrix, aij, compressed row, sparse, parallel
3459 
3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3461           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3462 @*/
3463 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3464 {
3465   PetscErrorCode ierr;
3466 
3467   PetscFunctionBegin;
3468   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3469   PetscValidType(B,1);
3470   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3471   PetscFunctionReturn(0);
3472 }
3473 
3474 #undef __FUNCT__
3475 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3476 /*@
3477      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3478          CSR format the local rows.
3479 
3480    Collective on MPI_Comm
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  m - number of local rows (Cannot be PETSC_DECIDE)
3485 .  n - This value should be the same as the local size used in creating the
3486        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3487        calculated if N is given) For square matrices n is almost always m.
3488 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3489 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3490 .   i - row indices
3491 .   j - column indices
3492 -   a - matrix values
3493 
3494    Output Parameter:
3495 .   mat - the matrix
3496 
3497    Level: intermediate
3498 
3499    Notes:
3500        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3501      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3502      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3503 
3504        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3505 
3506        The format which is used for the sparse matrix input, is equivalent to a
3507     row-major ordering.. i.e for the following matrix, the input data expected is
3508     as shown:
3509 
3510         1 0 0
3511         2 0 3     P0
3512        -------
3513         4 5 6     P1
3514 
3515      Process0 [P0]: rows_owned=[0,1]
3516         i =  {0,1,3}  [size = nrow+1  = 2+1]
3517         j =  {0,0,2}  [size = nz = 6]
3518         v =  {1,2,3}  [size = nz = 6]
3519 
3520      Process1 [P1]: rows_owned=[2]
3521         i =  {0,3}    [size = nrow+1  = 1+1]
3522         j =  {0,1,2}  [size = nz = 6]
3523         v =  {4,5,6}  [size = nz = 6]
3524 
3525 .keywords: matrix, aij, compressed row, sparse, parallel
3526 
3527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3528           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3529 @*/
3530 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3531 {
3532   PetscErrorCode ierr;
3533 
3534   PetscFunctionBegin;
3535   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3536   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3537   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3538   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3539   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3540   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3541   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3542   PetscFunctionReturn(0);
3543 }
3544 
3545 #undef __FUNCT__
3546 #define __FUNCT__ "MatCreateAIJ"
3547 /*@C
3548    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3549    (the default parallel PETSc format).  For good matrix assembly performance
3550    the user should preallocate the matrix storage by setting the parameters
3551    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3552    performance can be increased by more than a factor of 50.
3553 
3554    Collective on MPI_Comm
3555 
3556    Input Parameters:
3557 +  comm - MPI communicator
3558 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3559            This value should be the same as the local size used in creating the
3560            y vector for the matrix-vector product y = Ax.
3561 .  n - This value should be the same as the local size used in creating the
3562        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3563        calculated if N is given) For square matrices n is almost always m.
3564 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3565 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3566 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3567            (same value is used for all local rows)
3568 .  d_nnz - array containing the number of nonzeros in the various rows of the
3569            DIAGONAL portion of the local submatrix (possibly different for each row)
3570            or NULL, if d_nz is used to specify the nonzero structure.
3571            The size of this array is equal to the number of local rows, i.e 'm'.
3572 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3573            submatrix (same value is used for all local rows).
3574 -  o_nnz - array containing the number of nonzeros in the various rows of the
3575            OFF-DIAGONAL portion of the local submatrix (possibly different for
3576            each row) or NULL, if o_nz is used to specify the nonzero
3577            structure. The size of this array is equal to the number
3578            of local rows, i.e 'm'.
3579 
3580    Output Parameter:
3581 .  A - the matrix
3582 
3583    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3584    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3585    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3586 
3587    Notes:
3588    If the *_nnz parameter is given then the *_nz parameter is ignored
3589 
3590    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3591    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3592    storage requirements for this matrix.
3593 
3594    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3595    processor than it must be used on all processors that share the object for
3596    that argument.
3597 
3598    The user MUST specify either the local or global matrix dimensions
3599    (possibly both).
3600 
3601    The parallel matrix is partitioned across processors such that the
3602    first m0 rows belong to process 0, the next m1 rows belong to
3603    process 1, the next m2 rows belong to process 2 etc.. where
3604    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3605    values corresponding to [m x N] submatrix.
3606 
3607    The columns are logically partitioned with the n0 columns belonging
3608    to 0th partition, the next n1 columns belonging to the next
3609    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3610 
3611    The DIAGONAL portion of the local submatrix on any given processor
3612    is the submatrix corresponding to the rows and columns m,n
3613    corresponding to the given processor. i.e diagonal matrix on
3614    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3615    etc. The remaining portion of the local submatrix [m x (N-n)]
3616    constitute the OFF-DIAGONAL portion. The example below better
3617    illustrates this concept.
3618 
3619    For a square global matrix we define each processor's diagonal portion
3620    to be its local rows and the corresponding columns (a square submatrix);
3621    each processor's off-diagonal portion encompasses the remainder of the
3622    local matrix (a rectangular submatrix).
3623 
3624    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3625 
3626    When calling this routine with a single process communicator, a matrix of
3627    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3628    type of communicator, use the construction mechanism:
3629      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3630 
3631    By default, this format uses inodes (identical nodes) when possible.
3632    We search for consecutive rows with the same nonzero structure, thereby
3633    reusing matrix information to achieve increased efficiency.
3634 
3635    Options Database Keys:
3636 +  -mat_no_inode  - Do not use inodes
3637 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3638 -  -mat_aij_oneindex - Internally use indexing starting at 1
3639         rather than 0.  Note that when calling MatSetValues(),
3640         the user still MUST index entries starting at 0!
3641 
3642 
3643    Example usage:
3644 
3645    Consider the following 8x8 matrix with 34 non-zero values, that is
3646    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3647    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3648    as follows:
3649 
3650 .vb
3651             1  2  0  |  0  3  0  |  0  4
3652     Proc0   0  5  6  |  7  0  0  |  8  0
3653             9  0 10  | 11  0  0  | 12  0
3654     -------------------------------------
3655            13  0 14  | 15 16 17  |  0  0
3656     Proc1   0 18  0  | 19 20 21  |  0  0
3657             0  0  0  | 22 23  0  | 24  0
3658     -------------------------------------
3659     Proc2  25 26 27  |  0  0 28  | 29  0
3660            30  0  0  | 31 32 33  |  0 34
3661 .ve
3662 
3663    This can be represented as a collection of submatrices as:
3664 
3665 .vb
3666       A B C
3667       D E F
3668       G H I
3669 .ve
3670 
3671    Where the submatrices A,B,C are owned by proc0, D,E,F are
3672    owned by proc1, G,H,I are owned by proc2.
3673 
3674    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3675    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3676    The 'M','N' parameters are 8,8, and have the same values on all procs.
3677 
3678    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3679    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3680    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3681    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3682    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3683    matrix, ans [DF] as another SeqAIJ matrix.
3684 
3685    When d_nz, o_nz parameters are specified, d_nz storage elements are
3686    allocated for every row of the local diagonal submatrix, and o_nz
3687    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3688    One way to choose d_nz and o_nz is to use the max nonzerors per local
3689    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3690    In this case, the values of d_nz,o_nz are:
3691 .vb
3692      proc0 : dnz = 2, o_nz = 2
3693      proc1 : dnz = 3, o_nz = 2
3694      proc2 : dnz = 1, o_nz = 4
3695 .ve
3696    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3697    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3698    for proc3. i.e we are using 12+15+10=37 storage locations to store
3699    34 values.
3700 
3701    When d_nnz, o_nnz parameters are specified, the storage is specified
3702    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3703    In the above case the values for d_nnz,o_nnz are:
3704 .vb
3705      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3706      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3707      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3708 .ve
3709    Here the space allocated is sum of all the above values i.e 34, and
3710    hence pre-allocation is perfect.
3711 
3712    Level: intermediate
3713 
3714 .keywords: matrix, aij, compressed row, sparse, parallel
3715 
3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3717           MPIAIJ, MatCreateMPIAIJWithArrays()
3718 @*/
3719 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3720 {
3721   PetscErrorCode ierr;
3722   PetscMPIInt    size;
3723 
3724   PetscFunctionBegin;
3725   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3726   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3727   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3728   if (size > 1) {
3729     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3730     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3731   } else {
3732     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3733     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3734   }
3735   PetscFunctionReturn(0);
3736 }
3737 
3738 #undef __FUNCT__
3739 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3740 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3741 {
3742   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3743 
3744   PetscFunctionBegin;
3745   if (Ad)     *Ad     = a->A;
3746   if (Ao)     *Ao     = a->B;
3747   if (colmap) *colmap = a->garray;
3748   PetscFunctionReturn(0);
3749 }
3750 
3751 #undef __FUNCT__
3752 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3753 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3754 {
3755   PetscErrorCode ierr;
3756   PetscInt       i;
3757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3758 
3759   PetscFunctionBegin;
3760   if (coloring->ctype == IS_COLORING_GLOBAL) {
3761     ISColoringValue *allcolors,*colors;
3762     ISColoring      ocoloring;
3763 
3764     /* set coloring for diagonal portion */
3765     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3766 
3767     /* set coloring for off-diagonal portion */
3768     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3769     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3770     for (i=0; i<a->B->cmap->n; i++) {
3771       colors[i] = allcolors[a->garray[i]];
3772     }
3773     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3774     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3775     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3776     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3777   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3778     ISColoringValue *colors;
3779     PetscInt        *larray;
3780     ISColoring      ocoloring;
3781 
3782     /* set coloring for diagonal portion */
3783     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3784     for (i=0; i<a->A->cmap->n; i++) {
3785       larray[i] = i + A->cmap->rstart;
3786     }
3787     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3788     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3789     for (i=0; i<a->A->cmap->n; i++) {
3790       colors[i] = coloring->colors[larray[i]];
3791     }
3792     ierr = PetscFree(larray);CHKERRQ(ierr);
3793     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3794     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3795     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3796 
3797     /* set coloring for off-diagonal portion */
3798     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3799     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3800     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3801     for (i=0; i<a->B->cmap->n; i++) {
3802       colors[i] = coloring->colors[larray[i]];
3803     }
3804     ierr = PetscFree(larray);CHKERRQ(ierr);
3805     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3806     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3807     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3808   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3809   PetscFunctionReturn(0);
3810 }
3811 
3812 #undef __FUNCT__
3813 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3814 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3815 {
3816   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3817   PetscErrorCode ierr;
3818 
3819   PetscFunctionBegin;
3820   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3821   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3827 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3828 {
3829   PetscErrorCode ierr;
3830   PetscInt       m,N,i,rstart,nnz,Ii;
3831   PetscInt       *indx;
3832   PetscScalar    *values;
3833 
3834   PetscFunctionBegin;
3835   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3836   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3837     PetscInt       *dnz,*onz,sum,bs,cbs;
3838 
3839     if (n == PETSC_DECIDE) {
3840       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3841     }
3842     /* Check sum(n) = N */
3843     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3844     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3845 
3846     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3847     rstart -= m;
3848 
3849     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3850     for (i=0; i<m; i++) {
3851       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3852       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3853       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3854     }
3855 
3856     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3857     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3858     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3859     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3860     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3861     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3862     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3863   }
3864 
3865   /* numeric phase */
3866   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3867   for (i=0; i<m; i++) {
3868     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3869     Ii   = i + rstart;
3870     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3871     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3872   }
3873   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3874   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 #undef __FUNCT__
3879 #define __FUNCT__ "MatFileSplit"
3880 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3881 {
3882   PetscErrorCode    ierr;
3883   PetscMPIInt       rank;
3884   PetscInt          m,N,i,rstart,nnz;
3885   size_t            len;
3886   const PetscInt    *indx;
3887   PetscViewer       out;
3888   char              *name;
3889   Mat               B;
3890   const PetscScalar *values;
3891 
3892   PetscFunctionBegin;
3893   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3894   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3895   /* Should this be the type of the diagonal block of A? */
3896   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3897   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3898   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3899   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3900   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3901   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3902   for (i=0; i<m; i++) {
3903     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3904     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3905     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3906   }
3907   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3909 
3910   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3911   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3912   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
3913   sprintf(name,"%s.%d",outfile,rank);
3914   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3915   ierr = PetscFree(name);CHKERRQ(ierr);
3916   ierr = MatView(B,out);CHKERRQ(ierr);
3917   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3918   ierr = MatDestroy(&B);CHKERRQ(ierr);
3919   PetscFunctionReturn(0);
3920 }
3921 
3922 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3925 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3926 {
3927   PetscErrorCode      ierr;
3928   Mat_Merge_SeqsToMPI *merge;
3929   PetscContainer      container;
3930 
3931   PetscFunctionBegin;
3932   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3933   if (container) {
3934     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3935     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3936     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3937     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3938     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3944     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3945     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3946     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3947     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3948     ierr = PetscFree(merge);CHKERRQ(ierr);
3949     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3950   }
3951   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3952   PetscFunctionReturn(0);
3953 }
3954 
3955 #include <../src/mat/utils/freespace.h>
3956 #include <petscbt.h>
3957 
3958 #undef __FUNCT__
3959 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3960 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3961 {
3962   PetscErrorCode      ierr;
3963   MPI_Comm            comm;
3964   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3965   PetscMPIInt         size,rank,taga,*len_s;
3966   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3967   PetscInt            proc,m;
3968   PetscInt            **buf_ri,**buf_rj;
3969   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3970   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3971   MPI_Request         *s_waits,*r_waits;
3972   MPI_Status          *status;
3973   MatScalar           *aa=a->a;
3974   MatScalar           **abuf_r,*ba_i;
3975   Mat_Merge_SeqsToMPI *merge;
3976   PetscContainer      container;
3977 
3978   PetscFunctionBegin;
3979   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3980   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3981 
3982   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3983   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3984 
3985   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3986   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3987 
3988   bi     = merge->bi;
3989   bj     = merge->bj;
3990   buf_ri = merge->buf_ri;
3991   buf_rj = merge->buf_rj;
3992 
3993   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3994   owners = merge->rowmap->range;
3995   len_s  = merge->len_s;
3996 
3997   /* send and recv matrix values */
3998   /*-----------------------------*/
3999   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4000   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4001 
4002   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4003   for (proc=0,k=0; proc<size; proc++) {
4004     if (!len_s[proc]) continue;
4005     i    = owners[proc];
4006     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4007     k++;
4008   }
4009 
4010   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4011   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4012   ierr = PetscFree(status);CHKERRQ(ierr);
4013 
4014   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4015   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4016 
4017   /* insert mat values of mpimat */
4018   /*----------------------------*/
4019   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4020   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4021 
4022   for (k=0; k<merge->nrecv; k++) {
4023     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4024     nrows       = *(buf_ri_k[k]);
4025     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4026     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4027   }
4028 
4029   /* set values of ba */
4030   m = merge->rowmap->n;
4031   for (i=0; i<m; i++) {
4032     arow = owners[rank] + i;
4033     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4034     bnzi = bi[i+1] - bi[i];
4035     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4036 
4037     /* add local non-zero vals of this proc's seqmat into ba */
4038     anzi   = ai[arow+1] - ai[arow];
4039     aj     = a->j + ai[arow];
4040     aa     = a->a + ai[arow];
4041     nextaj = 0;
4042     for (j=0; nextaj<anzi; j++) {
4043       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4044         ba_i[j] += aa[nextaj++];
4045       }
4046     }
4047 
4048     /* add received vals into ba */
4049     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4050       /* i-th row */
4051       if (i == *nextrow[k]) {
4052         anzi   = *(nextai[k]+1) - *nextai[k];
4053         aj     = buf_rj[k] + *(nextai[k]);
4054         aa     = abuf_r[k] + *(nextai[k]);
4055         nextaj = 0;
4056         for (j=0; nextaj<anzi; j++) {
4057           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4058             ba_i[j] += aa[nextaj++];
4059           }
4060         }
4061         nextrow[k]++; nextai[k]++;
4062       }
4063     }
4064     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4065   }
4066   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4067   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4068 
4069   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4070   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4071   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4072   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4073   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4074   PetscFunctionReturn(0);
4075 }
4076 
4077 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4078 
4079 #undef __FUNCT__
4080 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4081 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4082 {
4083   PetscErrorCode      ierr;
4084   Mat                 B_mpi;
4085   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4086   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4087   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4088   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4089   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4090   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4091   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4092   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4093   MPI_Status          *status;
4094   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4095   PetscBT             lnkbt;
4096   Mat_Merge_SeqsToMPI *merge;
4097   PetscContainer      container;
4098 
4099   PetscFunctionBegin;
4100   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4101 
4102   /* make sure it is a PETSc comm */
4103   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4105   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4106 
4107   ierr = PetscNew(&merge);CHKERRQ(ierr);
4108   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4109 
4110   /* determine row ownership */
4111   /*---------------------------------------------------------*/
4112   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4113   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4114   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4115   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4116   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4117   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4118   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4119 
4120   m      = merge->rowmap->n;
4121   owners = merge->rowmap->range;
4122 
4123   /* determine the number of messages to send, their lengths */
4124   /*---------------------------------------------------------*/
4125   len_s = merge->len_s;
4126 
4127   len          = 0; /* length of buf_si[] */
4128   merge->nsend = 0;
4129   for (proc=0; proc<size; proc++) {
4130     len_si[proc] = 0;
4131     if (proc == rank) {
4132       len_s[proc] = 0;
4133     } else {
4134       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4135       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4136     }
4137     if (len_s[proc]) {
4138       merge->nsend++;
4139       nrows = 0;
4140       for (i=owners[proc]; i<owners[proc+1]; i++) {
4141         if (ai[i+1] > ai[i]) nrows++;
4142       }
4143       len_si[proc] = 2*(nrows+1);
4144       len         += len_si[proc];
4145     }
4146   }
4147 
4148   /* determine the number and length of messages to receive for ij-structure */
4149   /*-------------------------------------------------------------------------*/
4150   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4151   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4152 
4153   /* post the Irecv of j-structure */
4154   /*-------------------------------*/
4155   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4156   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4157 
4158   /* post the Isend of j-structure */
4159   /*--------------------------------*/
4160   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4161 
4162   for (proc=0, k=0; proc<size; proc++) {
4163     if (!len_s[proc]) continue;
4164     i    = owners[proc];
4165     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4166     k++;
4167   }
4168 
4169   /* receives and sends of j-structure are complete */
4170   /*------------------------------------------------*/
4171   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4172   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4173 
4174   /* send and recv i-structure */
4175   /*---------------------------*/
4176   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4177   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4178 
4179   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4180   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4181   for (proc=0,k=0; proc<size; proc++) {
4182     if (!len_s[proc]) continue;
4183     /* form outgoing message for i-structure:
4184          buf_si[0]:                 nrows to be sent
4185                [1:nrows]:           row index (global)
4186                [nrows+1:2*nrows+1]: i-structure index
4187     */
4188     /*-------------------------------------------*/
4189     nrows       = len_si[proc]/2 - 1;
4190     buf_si_i    = buf_si + nrows+1;
4191     buf_si[0]   = nrows;
4192     buf_si_i[0] = 0;
4193     nrows       = 0;
4194     for (i=owners[proc]; i<owners[proc+1]; i++) {
4195       anzi = ai[i+1] - ai[i];
4196       if (anzi) {
4197         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4198         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4199         nrows++;
4200       }
4201     }
4202     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4203     k++;
4204     buf_si += len_si[proc];
4205   }
4206 
4207   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4208   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4209 
4210   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4211   for (i=0; i<merge->nrecv; i++) {
4212     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4213   }
4214 
4215   ierr = PetscFree(len_si);CHKERRQ(ierr);
4216   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4217   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4218   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4219   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4220   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4221   ierr = PetscFree(status);CHKERRQ(ierr);
4222 
4223   /* compute a local seq matrix in each processor */
4224   /*----------------------------------------------*/
4225   /* allocate bi array and free space for accumulating nonzero column info */
4226   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4227   bi[0] = 0;
4228 
4229   /* create and initialize a linked list */
4230   nlnk = N+1;
4231   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4232 
4233   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4234   len  = ai[owners[rank+1]] - ai[owners[rank]];
4235   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4236 
4237   current_space = free_space;
4238 
4239   /* determine symbolic info for each local row */
4240   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4241 
4242   for (k=0; k<merge->nrecv; k++) {
4243     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4244     nrows       = *buf_ri_k[k];
4245     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4246     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4247   }
4248 
4249   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4250   len  = 0;
4251   for (i=0; i<m; i++) {
4252     bnzi = 0;
4253     /* add local non-zero cols of this proc's seqmat into lnk */
4254     arow  = owners[rank] + i;
4255     anzi  = ai[arow+1] - ai[arow];
4256     aj    = a->j + ai[arow];
4257     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4258     bnzi += nlnk;
4259     /* add received col data into lnk */
4260     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4261       if (i == *nextrow[k]) { /* i-th row */
4262         anzi  = *(nextai[k]+1) - *nextai[k];
4263         aj    = buf_rj[k] + *nextai[k];
4264         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4265         bnzi += nlnk;
4266         nextrow[k]++; nextai[k]++;
4267       }
4268     }
4269     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4270 
4271     /* if free space is not available, make more free space */
4272     if (current_space->local_remaining<bnzi) {
4273       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4274       nspacedouble++;
4275     }
4276     /* copy data into free space, then initialize lnk */
4277     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4278     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4279 
4280     current_space->array           += bnzi;
4281     current_space->local_used      += bnzi;
4282     current_space->local_remaining -= bnzi;
4283 
4284     bi[i+1] = bi[i] + bnzi;
4285   }
4286 
4287   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4288 
4289   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4290   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4291   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4292 
4293   /* create symbolic parallel matrix B_mpi */
4294   /*---------------------------------------*/
4295   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4296   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4297   if (n==PETSC_DECIDE) {
4298     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4299   } else {
4300     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4301   }
4302   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4303   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4304   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4305   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4306   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4307 
4308   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4309   B_mpi->assembled    = PETSC_FALSE;
4310   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4311   merge->bi           = bi;
4312   merge->bj           = bj;
4313   merge->buf_ri       = buf_ri;
4314   merge->buf_rj       = buf_rj;
4315   merge->coi          = NULL;
4316   merge->coj          = NULL;
4317   merge->owners_co    = NULL;
4318 
4319   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4320 
4321   /* attach the supporting struct to B_mpi for reuse */
4322   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4323   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4324   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4325   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4326   *mpimat = B_mpi;
4327 
4328   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4329   PetscFunctionReturn(0);
4330 }
4331 
4332 #undef __FUNCT__
4333 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4334 /*@C
4335       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4336                  matrices from each processor
4337 
4338     Collective on MPI_Comm
4339 
4340    Input Parameters:
4341 +    comm - the communicators the parallel matrix will live on
4342 .    seqmat - the input sequential matrices
4343 .    m - number of local rows (or PETSC_DECIDE)
4344 .    n - number of local columns (or PETSC_DECIDE)
4345 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4346 
4347    Output Parameter:
4348 .    mpimat - the parallel matrix generated
4349 
4350     Level: advanced
4351 
4352    Notes:
4353      The dimensions of the sequential matrix in each processor MUST be the same.
4354      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4355      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4356 @*/
4357 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4358 {
4359   PetscErrorCode ierr;
4360   PetscMPIInt    size;
4361 
4362   PetscFunctionBegin;
4363   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4364   if (size == 1) {
4365     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4366     if (scall == MAT_INITIAL_MATRIX) {
4367       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4368     } else {
4369       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4370     }
4371     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4372     PetscFunctionReturn(0);
4373   }
4374   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4375   if (scall == MAT_INITIAL_MATRIX) {
4376     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4377   }
4378   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4379   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4380   PetscFunctionReturn(0);
4381 }
4382 
4383 #undef __FUNCT__
4384 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4385 /*@
4386      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4387           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4388           with MatGetSize()
4389 
4390     Not Collective
4391 
4392    Input Parameters:
4393 +    A - the matrix
4394 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4395 
4396    Output Parameter:
4397 .    A_loc - the local sequential matrix generated
4398 
4399     Level: developer
4400 
4401 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4402 
4403 @*/
4404 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4405 {
4406   PetscErrorCode ierr;
4407   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4408   Mat_SeqAIJ     *mat,*a,*b;
4409   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4410   MatScalar      *aa,*ba,*cam;
4411   PetscScalar    *ca;
4412   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4413   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4414   PetscBool      match;
4415   MPI_Comm       comm;
4416   PetscMPIInt    size;
4417 
4418   PetscFunctionBegin;
4419   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4420   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4421   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4422   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4423   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4424 
4425   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4426   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4427   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4428   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4429   aa = a->a; ba = b->a;
4430   if (scall == MAT_INITIAL_MATRIX) {
4431     if (size == 1) {
4432       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4433       PetscFunctionReturn(0);
4434     }
4435 
4436     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4437     ci[0] = 0;
4438     for (i=0; i<am; i++) {
4439       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4440     }
4441     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4442     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4443     k    = 0;
4444     for (i=0; i<am; i++) {
4445       ncols_o = bi[i+1] - bi[i];
4446       ncols_d = ai[i+1] - ai[i];
4447       /* off-diagonal portion of A */
4448       for (jo=0; jo<ncols_o; jo++) {
4449         col = cmap[*bj];
4450         if (col >= cstart) break;
4451         cj[k]   = col; bj++;
4452         ca[k++] = *ba++;
4453       }
4454       /* diagonal portion of A */
4455       for (j=0; j<ncols_d; j++) {
4456         cj[k]   = cstart + *aj++;
4457         ca[k++] = *aa++;
4458       }
4459       /* off-diagonal portion of A */
4460       for (j=jo; j<ncols_o; j++) {
4461         cj[k]   = cmap[*bj++];
4462         ca[k++] = *ba++;
4463       }
4464     }
4465     /* put together the new matrix */
4466     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4467     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4468     /* Since these are PETSc arrays, change flags to free them as necessary. */
4469     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4470     mat->free_a  = PETSC_TRUE;
4471     mat->free_ij = PETSC_TRUE;
4472     mat->nonew   = 0;
4473   } else if (scall == MAT_REUSE_MATRIX) {
4474     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4475     ci = mat->i; cj = mat->j; cam = mat->a;
4476     for (i=0; i<am; i++) {
4477       /* off-diagonal portion of A */
4478       ncols_o = bi[i+1] - bi[i];
4479       for (jo=0; jo<ncols_o; jo++) {
4480         col = cmap[*bj];
4481         if (col >= cstart) break;
4482         *cam++ = *ba++; bj++;
4483       }
4484       /* diagonal portion of A */
4485       ncols_d = ai[i+1] - ai[i];
4486       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4487       /* off-diagonal portion of A */
4488       for (j=jo; j<ncols_o; j++) {
4489         *cam++ = *ba++; bj++;
4490       }
4491     }
4492   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4493   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4499 /*@C
4500      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4501 
4502     Not Collective
4503 
4504    Input Parameters:
4505 +    A - the matrix
4506 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4507 -    row, col - index sets of rows and columns to extract (or NULL)
4508 
4509    Output Parameter:
4510 .    A_loc - the local sequential matrix generated
4511 
4512     Level: developer
4513 
4514 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4515 
4516 @*/
4517 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4518 {
4519   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4520   PetscErrorCode ierr;
4521   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4522   IS             isrowa,iscola;
4523   Mat            *aloc;
4524   PetscBool      match;
4525 
4526   PetscFunctionBegin;
4527   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4528   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4529   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4530   if (!row) {
4531     start = A->rmap->rstart; end = A->rmap->rend;
4532     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4533   } else {
4534     isrowa = *row;
4535   }
4536   if (!col) {
4537     start = A->cmap->rstart;
4538     cmap  = a->garray;
4539     nzA   = a->A->cmap->n;
4540     nzB   = a->B->cmap->n;
4541     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4542     ncols = 0;
4543     for (i=0; i<nzB; i++) {
4544       if (cmap[i] < start) idx[ncols++] = cmap[i];
4545       else break;
4546     }
4547     imark = i;
4548     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4549     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4550     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4551   } else {
4552     iscola = *col;
4553   }
4554   if (scall != MAT_INITIAL_MATRIX) {
4555     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4556     aloc[0] = *A_loc;
4557   }
4558   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4559   *A_loc = aloc[0];
4560   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4561   if (!row) {
4562     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4563   }
4564   if (!col) {
4565     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4566   }
4567   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 #undef __FUNCT__
4572 #define __FUNCT__ "MatGetBrowsOfAcols"
4573 /*@C
4574     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4575 
4576     Collective on Mat
4577 
4578    Input Parameters:
4579 +    A,B - the matrices in mpiaij format
4580 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4581 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4582 
4583    Output Parameter:
4584 +    rowb, colb - index sets of rows and columns of B to extract
4585 -    B_seq - the sequential matrix generated
4586 
4587     Level: developer
4588 
4589 @*/
4590 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4591 {
4592   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4593   PetscErrorCode ierr;
4594   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4595   IS             isrowb,iscolb;
4596   Mat            *bseq=NULL;
4597 
4598   PetscFunctionBegin;
4599   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4600     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4601   }
4602   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4603 
4604   if (scall == MAT_INITIAL_MATRIX) {
4605     start = A->cmap->rstart;
4606     cmap  = a->garray;
4607     nzA   = a->A->cmap->n;
4608     nzB   = a->B->cmap->n;
4609     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4610     ncols = 0;
4611     for (i=0; i<nzB; i++) {  /* row < local row index */
4612       if (cmap[i] < start) idx[ncols++] = cmap[i];
4613       else break;
4614     }
4615     imark = i;
4616     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4617     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4618     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4619     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4620   } else {
4621     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4622     isrowb  = *rowb; iscolb = *colb;
4623     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4624     bseq[0] = *B_seq;
4625   }
4626   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4627   *B_seq = bseq[0];
4628   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4629   if (!rowb) {
4630     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4631   } else {
4632     *rowb = isrowb;
4633   }
4634   if (!colb) {
4635     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4636   } else {
4637     *colb = iscolb;
4638   }
4639   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4640   PetscFunctionReturn(0);
4641 }
4642 
4643 #undef __FUNCT__
4644 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4645 /*
4646     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4647     of the OFF-DIAGONAL portion of local A
4648 
4649     Collective on Mat
4650 
4651    Input Parameters:
4652 +    A,B - the matrices in mpiaij format
4653 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4654 
4655    Output Parameter:
4656 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4657 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4658 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4659 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4660 
4661     Level: developer
4662 
4663 */
4664 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4665 {
4666   VecScatter_MPI_General *gen_to,*gen_from;
4667   PetscErrorCode         ierr;
4668   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4669   Mat_SeqAIJ             *b_oth;
4670   VecScatter             ctx =a->Mvctx;
4671   MPI_Comm               comm;
4672   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4673   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4674   PetscScalar            *rvalues,*svalues;
4675   MatScalar              *b_otha,*bufa,*bufA;
4676   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4677   MPI_Request            *rwaits = NULL,*swaits = NULL;
4678   MPI_Status             *sstatus,rstatus;
4679   PetscMPIInt            jj,size;
4680   PetscInt               *cols,sbs,rbs;
4681   PetscScalar            *vals;
4682 
4683   PetscFunctionBegin;
4684   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4686   if (size == 1) PetscFunctionReturn(0);
4687 
4688   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4689     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4690   }
4691   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4693 
4694   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4695   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4696   rvalues  = gen_from->values; /* holds the length of receiving row */
4697   svalues  = gen_to->values;   /* holds the length of sending row */
4698   nrecvs   = gen_from->n;
4699   nsends   = gen_to->n;
4700 
4701   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4702   srow    = gen_to->indices;    /* local row index to be sent */
4703   sstarts = gen_to->starts;
4704   sprocs  = gen_to->procs;
4705   sstatus = gen_to->sstatus;
4706   sbs     = gen_to->bs;
4707   rstarts = gen_from->starts;
4708   rprocs  = gen_from->procs;
4709   rbs     = gen_from->bs;
4710 
4711   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4712   if (scall == MAT_INITIAL_MATRIX) {
4713     /* i-array */
4714     /*---------*/
4715     /*  post receives */
4716     for (i=0; i<nrecvs; i++) {
4717       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4718       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4719       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4720     }
4721 
4722     /* pack the outgoing message */
4723     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4724 
4725     sstartsj[0] = 0;
4726     rstartsj[0] = 0;
4727     len         = 0; /* total length of j or a array to be sent */
4728     k           = 0;
4729     for (i=0; i<nsends; i++) {
4730       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4731       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4732       for (j=0; j<nrows; j++) {
4733         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4734         for (l=0; l<sbs; l++) {
4735           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4736 
4737           rowlen[j*sbs+l] = ncols;
4738 
4739           len += ncols;
4740           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4741         }
4742         k++;
4743       }
4744       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4745 
4746       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4747     }
4748     /* recvs and sends of i-array are completed */
4749     i = nrecvs;
4750     while (i--) {
4751       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4752     }
4753     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4754 
4755     /* allocate buffers for sending j and a arrays */
4756     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4757     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4758 
4759     /* create i-array of B_oth */
4760     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4761 
4762     b_othi[0] = 0;
4763     len       = 0; /* total length of j or a array to be received */
4764     k         = 0;
4765     for (i=0; i<nrecvs; i++) {
4766       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4767       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4768       for (j=0; j<nrows; j++) {
4769         b_othi[k+1] = b_othi[k] + rowlen[j];
4770         len        += rowlen[j]; k++;
4771       }
4772       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4773     }
4774 
4775     /* allocate space for j and a arrrays of B_oth */
4776     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4777     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4778 
4779     /* j-array */
4780     /*---------*/
4781     /*  post receives of j-array */
4782     for (i=0; i<nrecvs; i++) {
4783       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4784       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4785     }
4786 
4787     /* pack the outgoing message j-array */
4788     k = 0;
4789     for (i=0; i<nsends; i++) {
4790       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4791       bufJ  = bufj+sstartsj[i];
4792       for (j=0; j<nrows; j++) {
4793         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4794         for (ll=0; ll<sbs; ll++) {
4795           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4796           for (l=0; l<ncols; l++) {
4797             *bufJ++ = cols[l];
4798           }
4799           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4800         }
4801       }
4802       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4803     }
4804 
4805     /* recvs and sends of j-array are completed */
4806     i = nrecvs;
4807     while (i--) {
4808       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4809     }
4810     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4811   } else if (scall == MAT_REUSE_MATRIX) {
4812     sstartsj = *startsj_s;
4813     rstartsj = *startsj_r;
4814     bufa     = *bufa_ptr;
4815     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4816     b_otha   = b_oth->a;
4817   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4818 
4819   /* a-array */
4820   /*---------*/
4821   /*  post receives of a-array */
4822   for (i=0; i<nrecvs; i++) {
4823     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4824     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4825   }
4826 
4827   /* pack the outgoing message a-array */
4828   k = 0;
4829   for (i=0; i<nsends; i++) {
4830     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4831     bufA  = bufa+sstartsj[i];
4832     for (j=0; j<nrows; j++) {
4833       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4834       for (ll=0; ll<sbs; ll++) {
4835         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4836         for (l=0; l<ncols; l++) {
4837           *bufA++ = vals[l];
4838         }
4839         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4840       }
4841     }
4842     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4843   }
4844   /* recvs and sends of a-array are completed */
4845   i = nrecvs;
4846   while (i--) {
4847     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4848   }
4849   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4850   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4851 
4852   if (scall == MAT_INITIAL_MATRIX) {
4853     /* put together the new matrix */
4854     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4855 
4856     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4857     /* Since these are PETSc arrays, change flags to free them as necessary. */
4858     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4859     b_oth->free_a  = PETSC_TRUE;
4860     b_oth->free_ij = PETSC_TRUE;
4861     b_oth->nonew   = 0;
4862 
4863     ierr = PetscFree(bufj);CHKERRQ(ierr);
4864     if (!startsj_s || !bufa_ptr) {
4865       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4866       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4867     } else {
4868       *startsj_s = sstartsj;
4869       *startsj_r = rstartsj;
4870       *bufa_ptr  = bufa;
4871     }
4872   }
4873   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 #undef __FUNCT__
4878 #define __FUNCT__ "MatGetCommunicationStructs"
4879 /*@C
4880   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4881 
4882   Not Collective
4883 
4884   Input Parameters:
4885 . A - The matrix in mpiaij format
4886 
4887   Output Parameter:
4888 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4889 . colmap - A map from global column index to local index into lvec
4890 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4891 
4892   Level: developer
4893 
4894 @*/
4895 #if defined(PETSC_USE_CTABLE)
4896 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4897 #else
4898 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4899 #endif
4900 {
4901   Mat_MPIAIJ *a;
4902 
4903   PetscFunctionBegin;
4904   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4905   PetscValidPointer(lvec, 2);
4906   PetscValidPointer(colmap, 3);
4907   PetscValidPointer(multScatter, 4);
4908   a = (Mat_MPIAIJ*) A->data;
4909   if (lvec) *lvec = a->lvec;
4910   if (colmap) *colmap = a->colmap;
4911   if (multScatter) *multScatter = a->Mvctx;
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4917 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4918 #if defined(PETSC_HAVE_ELEMENTAL)
4919 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4920 #endif
4921 
4922 #undef __FUNCT__
4923 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4924 /*
4925     Computes (B'*A')' since computing B*A directly is untenable
4926 
4927                n                       p                          p
4928         (              )       (              )         (                  )
4929       m (      A       )  *  n (       B      )   =   m (         C        )
4930         (              )       (              )         (                  )
4931 
4932 */
4933 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4934 {
4935   PetscErrorCode ierr;
4936   Mat            At,Bt,Ct;
4937 
4938   PetscFunctionBegin;
4939   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4940   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4941   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4942   ierr = MatDestroy(&At);CHKERRQ(ierr);
4943   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4944   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4945   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4951 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4952 {
4953   PetscErrorCode ierr;
4954   PetscInt       m=A->rmap->n,n=B->cmap->n;
4955   Mat            Cmat;
4956 
4957   PetscFunctionBegin;
4958   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4959   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4960   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4961   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4962   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4963   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4964   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4965   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4966 
4967   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4968 
4969   *C = Cmat;
4970   PetscFunctionReturn(0);
4971 }
4972 
4973 /* ----------------------------------------------------------------*/
4974 #undef __FUNCT__
4975 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4976 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4977 {
4978   PetscErrorCode ierr;
4979 
4980   PetscFunctionBegin;
4981   if (scall == MAT_INITIAL_MATRIX) {
4982     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4983     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4984     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4985   }
4986   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4987   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4988   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4989   PetscFunctionReturn(0);
4990 }
4991 
4992 /*MC
4993    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4994 
4995    Options Database Keys:
4996 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4997 
4998   Level: beginner
4999 
5000 .seealso: MatCreateAIJ()
5001 M*/
5002 
5003 #undef __FUNCT__
5004 #define __FUNCT__ "MatCreate_MPIAIJ"
5005 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5006 {
5007   Mat_MPIAIJ     *b;
5008   PetscErrorCode ierr;
5009   PetscMPIInt    size;
5010 
5011   PetscFunctionBegin;
5012   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5013 
5014   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5015   B->data       = (void*)b;
5016   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5017   B->assembled  = PETSC_FALSE;
5018   B->insertmode = NOT_SET_VALUES;
5019   b->size       = size;
5020 
5021   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5022 
5023   /* build cache for off array entries formed */
5024   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5025 
5026   b->donotstash  = PETSC_FALSE;
5027   b->colmap      = 0;
5028   b->garray      = 0;
5029   b->roworiented = PETSC_TRUE;
5030 
5031   /* stuff used for matrix vector multiply */
5032   b->lvec  = NULL;
5033   b->Mvctx = NULL;
5034 
5035   /* stuff for MatGetRow() */
5036   b->rowindices   = 0;
5037   b->rowvalues    = 0;
5038   b->getrowactive = PETSC_FALSE;
5039 
5040   /* flexible pointer used in CUSP/CUSPARSE classes */
5041   b->spptr = NULL;
5042 
5043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5053 #if defined(PETSC_HAVE_ELEMENTAL)
5054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5055 #endif
5056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5059   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5060   PetscFunctionReturn(0);
5061 }
5062 
5063 #undef __FUNCT__
5064 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5065 /*@C
5066      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5067          and "off-diagonal" part of the matrix in CSR format.
5068 
5069    Collective on MPI_Comm
5070 
5071    Input Parameters:
5072 +  comm - MPI communicator
5073 .  m - number of local rows (Cannot be PETSC_DECIDE)
5074 .  n - This value should be the same as the local size used in creating the
5075        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5076        calculated if N is given) For square matrices n is almost always m.
5077 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5078 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5079 .   i - row indices for "diagonal" portion of matrix
5080 .   j - column indices
5081 .   a - matrix values
5082 .   oi - row indices for "off-diagonal" portion of matrix
5083 .   oj - column indices
5084 -   oa - matrix values
5085 
5086    Output Parameter:
5087 .   mat - the matrix
5088 
5089    Level: advanced
5090 
5091    Notes:
5092        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5093        must free the arrays once the matrix has been destroyed and not before.
5094 
5095        The i and j indices are 0 based
5096 
5097        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5098 
5099        This sets local rows and cannot be used to set off-processor values.
5100 
5101        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5102        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5103        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5104        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5105        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5106        communication if it is known that only local entries will be set.
5107 
5108 .keywords: matrix, aij, compressed row, sparse, parallel
5109 
5110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5111           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5112 C@*/
5113 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5114 {
5115   PetscErrorCode ierr;
5116   Mat_MPIAIJ     *maij;
5117 
5118   PetscFunctionBegin;
5119   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5120   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5121   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5122   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5123   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5124   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5125   maij = (Mat_MPIAIJ*) (*mat)->data;
5126 
5127   (*mat)->preallocated = PETSC_TRUE;
5128 
5129   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5130   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5131 
5132   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5133   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5134 
5135   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5136   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5137   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5138   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5139 
5140   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5141   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5142   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5143   PetscFunctionReturn(0);
5144 }
5145 
5146 /*
5147     Special version for direct calls from Fortran
5148 */
5149 #include <petsc-private/fortranimpl.h>
5150 
5151 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5152 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5153 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5154 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5155 #endif
5156 
5157 /* Change these macros so can be used in void function */
5158 #undef CHKERRQ
5159 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5160 #undef SETERRQ2
5161 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5162 #undef SETERRQ3
5163 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5164 #undef SETERRQ
5165 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5166 
5167 #undef __FUNCT__
5168 #define __FUNCT__ "matsetvaluesmpiaij_"
5169 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5170 {
5171   Mat            mat  = *mmat;
5172   PetscInt       m    = *mm, n = *mn;
5173   InsertMode     addv = *maddv;
5174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5175   PetscScalar    value;
5176   PetscErrorCode ierr;
5177 
5178   MatCheckPreallocated(mat,1);
5179   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5180 
5181 #if defined(PETSC_USE_DEBUG)
5182   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5183 #endif
5184   {
5185     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5186     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5187     PetscBool roworiented = aij->roworiented;
5188 
5189     /* Some Variables required in the macro */
5190     Mat        A                 = aij->A;
5191     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5192     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5193     MatScalar  *aa               = a->a;
5194     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5195     Mat        B                 = aij->B;
5196     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5197     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5198     MatScalar  *ba               = b->a;
5199 
5200     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5201     PetscInt  nonew = a->nonew;
5202     MatScalar *ap1,*ap2;
5203 
5204     PetscFunctionBegin;
5205     for (i=0; i<m; i++) {
5206       if (im[i] < 0) continue;
5207 #if defined(PETSC_USE_DEBUG)
5208       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5209 #endif
5210       if (im[i] >= rstart && im[i] < rend) {
5211         row      = im[i] - rstart;
5212         lastcol1 = -1;
5213         rp1      = aj + ai[row];
5214         ap1      = aa + ai[row];
5215         rmax1    = aimax[row];
5216         nrow1    = ailen[row];
5217         low1     = 0;
5218         high1    = nrow1;
5219         lastcol2 = -1;
5220         rp2      = bj + bi[row];
5221         ap2      = ba + bi[row];
5222         rmax2    = bimax[row];
5223         nrow2    = bilen[row];
5224         low2     = 0;
5225         high2    = nrow2;
5226 
5227         for (j=0; j<n; j++) {
5228           if (roworiented) value = v[i*n+j];
5229           else value = v[i+j*m];
5230           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5231           if (in[j] >= cstart && in[j] < cend) {
5232             col = in[j] - cstart;
5233             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5234           } else if (in[j] < 0) continue;
5235 #if defined(PETSC_USE_DEBUG)
5236           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5237 #endif
5238           else {
5239             if (mat->was_assembled) {
5240               if (!aij->colmap) {
5241                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5242               }
5243 #if defined(PETSC_USE_CTABLE)
5244               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5245               col--;
5246 #else
5247               col = aij->colmap[in[j]] - 1;
5248 #endif
5249               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5250                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5251                 col  =  in[j];
5252                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5253                 B     = aij->B;
5254                 b     = (Mat_SeqAIJ*)B->data;
5255                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5256                 rp2   = bj + bi[row];
5257                 ap2   = ba + bi[row];
5258                 rmax2 = bimax[row];
5259                 nrow2 = bilen[row];
5260                 low2  = 0;
5261                 high2 = nrow2;
5262                 bm    = aij->B->rmap->n;
5263                 ba    = b->a;
5264               }
5265             } else col = in[j];
5266             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5267           }
5268         }
5269       } else if (!aij->donotstash) {
5270         if (roworiented) {
5271           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5272         } else {
5273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5274         }
5275       }
5276     }
5277   }
5278   PetscFunctionReturnVoid();
5279 }
5280 
5281