xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 01e13f7363797b80262b2aca243ce72bcaa53640)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petsc-private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatDistribute_MPIAIJ"
193 /*
194     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
195     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
196 
197     Only for square matrices
198 
199     Used by a preconditioner, hence PETSC_EXTERN
200 */
201 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
202 {
203   PetscMPIInt    rank,size;
204   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
205   PetscErrorCode ierr;
206   Mat            mat;
207   Mat_SeqAIJ     *gmata;
208   PetscMPIInt    tag;
209   MPI_Status     status;
210   PetscBool      aij;
211   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
212 
213   PetscFunctionBegin;
214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
216   if (!rank) {
217     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
218     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
219   }
220   if (reuse == MAT_INITIAL_MATRIX) {
221     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
222     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
223     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
224     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
225     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
226     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
227     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
228     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
229     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
230 
231     rowners[0] = 0;
232     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
233     rstart = rowners[rank];
234     rend   = rowners[rank+1];
235     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
236     if (!rank) {
237       gmata = (Mat_SeqAIJ*) gmat->data;
238       /* send row lengths to all processors */
239       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
240       for (i=1; i<size; i++) {
241         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
242       }
243       /* determine number diagonal and off-diagonal counts */
244       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
245       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
246       jj   = 0;
247       for (i=0; i<m; i++) {
248         for (j=0; j<dlens[i]; j++) {
249           if (gmata->j[jj] < rstart) ld[i]++;
250           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
251           jj++;
252         }
253       }
254       /* send column indices to other processes */
255       for (i=1; i<size; i++) {
256         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
257         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
259       }
260 
261       /* send numerical values to other processes */
262       for (i=1; i<size; i++) {
263         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
264         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
265       }
266       gmataa = gmata->a;
267       gmataj = gmata->j;
268 
269     } else {
270       /* receive row lengths */
271       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
272       /* receive column indices */
273       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
274       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
276       /* determine number diagonal and off-diagonal counts */
277       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
278       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
279       jj   = 0;
280       for (i=0; i<m; i++) {
281         for (j=0; j<dlens[i]; j++) {
282           if (gmataj[jj] < rstart) ld[i]++;
283           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
284           jj++;
285         }
286       }
287       /* receive numerical values */
288       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
289       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
290     }
291     /* set preallocation */
292     for (i=0; i<m; i++) {
293       dlens[i] -= olens[i];
294     }
295     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
296     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
297 
298     for (i=0; i<m; i++) {
299       dlens[i] += olens[i];
300     }
301     cnt = 0;
302     for (i=0; i<m; i++) {
303       row  = rstart + i;
304       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
305       cnt += dlens[i];
306     }
307     if (rank) {
308       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
309     }
310     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
311     ierr = PetscFree(rowners);CHKERRQ(ierr);
312 
313     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
314 
315     *inmat = mat;
316   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
317     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
318     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
319     mat  = *inmat;
320     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
321     if (!rank) {
322       /* send numerical values to other processes */
323       gmata  = (Mat_SeqAIJ*) gmat->data;
324       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
325       gmataa = gmata->a;
326       for (i=1; i<size; i++) {
327         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
328         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
329       }
330       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
331     } else {
332       /* receive numerical values from process 0*/
333       nz   = Ad->nz + Ao->nz;
334       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
335       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
336     }
337     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
338     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
339     ad = Ad->a;
340     ao = Ao->a;
341     if (mat->rmap->n) {
342       i  = 0;
343       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
344       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
345     }
346     for (i=1; i<mat->rmap->n; i++) {
347       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
348       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
349     }
350     i--;
351     if (mat->rmap->n) {
352       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
353     }
354     if (rank) {
355       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
356     }
357   }
358   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364   Local utility routine that creates a mapping from the global column
365 number to the local number in the off-diagonal part of the local
366 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
367 a slightly higher hash table cost; without it it is not scalable (each processor
368 has an order N integer array but is fast to acess.
369 */
370 #undef __FUNCT__
371 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
372 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
373 {
374   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
375   PetscErrorCode ierr;
376   PetscInt       n = aij->B->cmap->n,i;
377 
378   PetscFunctionBegin;
379   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
380 #if defined(PETSC_USE_CTABLE)
381   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
382   for (i=0; i<n; i++) {
383     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
384   }
385 #else
386   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
387   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
388   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
389 #endif
390   PetscFunctionReturn(0);
391 }
392 
393 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
394 { \
395     if (col <= lastcol1)  low1 = 0;     \
396     else                 high1 = nrow1; \
397     lastcol1 = col;\
398     while (high1-low1 > 5) { \
399       t = (low1+high1)/2; \
400       if (rp1[t] > col) high1 = t; \
401       else              low1  = t; \
402     } \
403       for (_i=low1; _i<high1; _i++) { \
404         if (rp1[_i] > col) break; \
405         if (rp1[_i] == col) { \
406           if (addv == ADD_VALUES) ap1[_i] += value;   \
407           else                    ap1[_i] = value; \
408           goto a_noinsert; \
409         } \
410       }  \
411       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
412       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
413       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
414       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
415       N = nrow1++ - 1; a->nz++; high1++; \
416       /* shift up all the later entries in this row */ \
417       for (ii=N; ii>=_i; ii--) { \
418         rp1[ii+1] = rp1[ii]; \
419         ap1[ii+1] = ap1[ii]; \
420       } \
421       rp1[_i] = col;  \
422       ap1[_i] = value;  \
423       A->nonzerostate++;\
424       a_noinsert: ; \
425       ailen[row] = nrow1; \
426 }
427 
428 
429 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
430   { \
431     if (col <= lastcol2) low2 = 0;                        \
432     else high2 = nrow2;                                   \
433     lastcol2 = col;                                       \
434     while (high2-low2 > 5) {                              \
435       t = (low2+high2)/2;                                 \
436       if (rp2[t] > col) high2 = t;                        \
437       else             low2  = t;                         \
438     }                                                     \
439     for (_i=low2; _i<high2; _i++) {                       \
440       if (rp2[_i] > col) break;                           \
441       if (rp2[_i] == col) {                               \
442         if (addv == ADD_VALUES) ap2[_i] += value;         \
443         else                    ap2[_i] = value;          \
444         goto b_noinsert;                                  \
445       }                                                   \
446     }                                                     \
447     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
448     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
449     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
450     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
451     N = nrow2++ - 1; b->nz++; high2++;                    \
452     /* shift up all the later entries in this row */      \
453     for (ii=N; ii>=_i; ii--) {                            \
454       rp2[ii+1] = rp2[ii];                                \
455       ap2[ii+1] = ap2[ii];                                \
456     }                                                     \
457     rp2[_i] = col;                                        \
458     ap2[_i] = value;                                      \
459     B->nonzerostate++;                                    \
460     b_noinsert: ;                                         \
461     bilen[row] = nrow2;                                   \
462   }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
466 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
467 {
468   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
469   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
470   PetscErrorCode ierr;
471   PetscInt       l,*garray = mat->garray,diag;
472 
473   PetscFunctionBegin;
474   /* code only works for square matrices A */
475 
476   /* find size of row to the left of the diagonal part */
477   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
478   row  = row - diag;
479   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
480     if (garray[b->j[b->i[row]+l]] > diag) break;
481   }
482   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
483 
484   /* diagonal part */
485   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
486 
487   /* right of diagonal part */
488   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatSetValues_MPIAIJ"
494 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
495 {
496   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
497   PetscScalar    value;
498   PetscErrorCode ierr;
499   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
500   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
501   PetscBool      roworiented = aij->roworiented;
502 
503   /* Some Variables required in the macro */
504   Mat        A                 = aij->A;
505   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
506   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
507   MatScalar  *aa               = a->a;
508   PetscBool  ignorezeroentries = a->ignorezeroentries;
509   Mat        B                 = aij->B;
510   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
511   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
512   MatScalar  *ba               = b->a;
513 
514   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
515   PetscInt  nonew;
516   MatScalar *ap1,*ap2;
517 
518   PetscFunctionBegin;
519   for (i=0; i<m; i++) {
520     if (im[i] < 0) continue;
521 #if defined(PETSC_USE_DEBUG)
522     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
523 #endif
524     if (im[i] >= rstart && im[i] < rend) {
525       row      = im[i] - rstart;
526       lastcol1 = -1;
527       rp1      = aj + ai[row];
528       ap1      = aa + ai[row];
529       rmax1    = aimax[row];
530       nrow1    = ailen[row];
531       low1     = 0;
532       high1    = nrow1;
533       lastcol2 = -1;
534       rp2      = bj + bi[row];
535       ap2      = ba + bi[row];
536       rmax2    = bimax[row];
537       nrow2    = bilen[row];
538       low2     = 0;
539       high2    = nrow2;
540 
541       for (j=0; j<n; j++) {
542         if (roworiented) value = v[i*n+j];
543         else             value = v[i+j*m];
544         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
545         if (in[j] >= cstart && in[j] < cend) {
546           col   = in[j] - cstart;
547           nonew = a->nonew;
548           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
549         } else if (in[j] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
552 #endif
553         else {
554           if (mat->was_assembled) {
555             if (!aij->colmap) {
556               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
557             }
558 #if defined(PETSC_USE_CTABLE)
559             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
560             col--;
561 #else
562             col = aij->colmap[in[j]] - 1;
563 #endif
564             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
565               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
566               col  =  in[j];
567               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
568               B     = aij->B;
569               b     = (Mat_SeqAIJ*)B->data;
570               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
571               rp2   = bj + bi[row];
572               ap2   = ba + bi[row];
573               rmax2 = bimax[row];
574               nrow2 = bilen[row];
575               low2  = 0;
576               high2 = nrow2;
577               bm    = aij->B->rmap->n;
578               ba    = b->a;
579             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
580           } else col = in[j];
581           nonew = b->nonew;
582           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
583         }
584       }
585     } else {
586       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
587       if (!aij->donotstash) {
588         mat->assembled = PETSC_FALSE;
589         if (roworiented) {
590           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
591         } else {
592           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
593         }
594       }
595     }
596   }
597   PetscFunctionReturn(0);
598 }
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "MatGetValues_MPIAIJ"
602 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
603 {
604   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
605   PetscErrorCode ierr;
606   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
607   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
608 
609   PetscFunctionBegin;
610   for (i=0; i<m; i++) {
611     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
612     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
613     if (idxm[i] >= rstart && idxm[i] < rend) {
614       row = idxm[i] - rstart;
615       for (j=0; j<n; j++) {
616         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
617         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
618         if (idxn[j] >= cstart && idxn[j] < cend) {
619           col  = idxn[j] - cstart;
620           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
621         } else {
622           if (!aij->colmap) {
623             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
624           }
625 #if defined(PETSC_USE_CTABLE)
626           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
627           col--;
628 #else
629           col = aij->colmap[idxn[j]] - 1;
630 #endif
631           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
632           else {
633             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
634           }
635         }
636       }
637     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
646 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
647 {
648   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
649   PetscErrorCode ierr;
650   PetscInt       nstash,reallocs;
651   InsertMode     addv;
652 
653   PetscFunctionBegin;
654   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
655 
656   /* make sure all processors are either in INSERTMODE or ADDMODE */
657   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
658   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
659   mat->insertmode = addv; /* in case this processor had no cache */
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
669 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
672   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
673   PetscErrorCode ierr;
674   PetscMPIInt    n;
675   PetscInt       i,j,rstart,ncols,flg;
676   PetscInt       *row,*col;
677   PetscBool      other_disassembled;
678   PetscScalar    *val;
679   InsertMode     addv = mat->insertmode;
680 
681   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
682 
683   PetscFunctionBegin;
684   if (!aij->donotstash && !mat->nooffprocentries) {
685     while (1) {
686       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
687       if (!flg) break;
688 
689       for (i=0; i<n; ) {
690         /* Now identify the consecutive vals belonging to the same row */
691         for (j=i,rstart=row[j]; j<n; j++) {
692           if (row[j] != rstart) break;
693         }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698 
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703   }
704   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
705   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
706 
707   /* determine if any processor has disassembled, if so we must
708      also disassemble ourselfs, in order that we may reassemble. */
709   /*
710      if nonzero structure of submatrix B cannot change then we know that
711      no processor disassembled thus we can skip this stuff
712   */
713   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
714     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
715     if (mat->was_assembled && !other_disassembled) {
716       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
717     }
718   }
719   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
720     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
721   }
722   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
723   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
724   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
725 
726   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
727 
728   aij->rowvalues = 0;
729 
730   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
731   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
732 
733   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
734   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
735     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
736     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
743 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
744 {
745   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
750   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNCT__
755 #define __FUNCT__ "MatZeroRows_MPIAIJ"
756 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
757 {
758   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
759   PetscInt      *owners = A->rmap->range;
760   PetscInt       n      = A->rmap->n;
761   PetscSF        sf;
762   PetscInt      *lrows;
763   PetscSFNode   *rrows;
764   PetscInt       r, p = 0, len = 0;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   /* Create SF where leaves are input rows and roots are owned rows */
769   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
770   for (r = 0; r < n; ++r) lrows[r] = -1;
771   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
772   for (r = 0; r < N; ++r) {
773     const PetscInt idx   = rows[r];
774     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
775     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
776       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
777     }
778     if (A->nooffproczerorows) {
779       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
780       lrows[len++] = idx - owners[p];
781     } else {
782       rrows[r].rank = p;
783       rrows[r].index = rows[r] - owners[p];
784     }
785   }
786   if (!A->nooffproczerorows) {
787     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
788     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
789     /* Collect flags for rows to be zeroed */
790     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
792     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
793     /* Compress and put in row numbers */
794     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
795   }
796   /* fix right hand side if needed */
797   if (x && b) {
798     const PetscScalar *xx;
799     PetscScalar       *bb;
800 
801     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
803     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
804     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
806   }
807   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
808   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
810     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
811   } else if (diag != 0.0) {
812     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
813     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
814     for (r = 0; r < len; ++r) {
815       const PetscInt row = lrows[r] + A->rmap->rstart;
816       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
817     }
818     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820   } else {
821     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
822   }
823   ierr = PetscFree(lrows);CHKERRQ(ierr);
824 
825   /* only change matrix nonzero state if pattern was allowed to be changed */
826   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
827     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
828     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
835 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
836 {
837   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
838   PetscErrorCode    ierr;
839   PetscMPIInt       n = A->rmap->n;
840   PetscInt          i,j,r,m,p = 0,len = 0;
841   PetscInt          *lrows,*owners = A->rmap->range;
842   PetscSFNode       *rrows;
843   PetscSF           sf;
844   const PetscScalar *xx;
845   PetscScalar       *bb,*mask;
846   Vec               xmask,lmask;
847   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
848   const PetscInt    *aj, *ii,*ridx;
849   PetscScalar       *aa;
850 
851   PetscFunctionBegin;
852   /* Create SF where leaves are input rows and roots are owned rows */
853   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
854   for (r = 0; r < n; ++r) lrows[r] = -1;
855   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
856   for (r = 0; r < N; ++r) {
857     const PetscInt idx   = rows[r];
858     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
859     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
860       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
861     }
862     rrows[r].rank  = p;
863     rrows[r].index = rows[r] - owners[p];
864   }
865   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
866   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
867   /* Collect flags for rows to be zeroed */
868   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
870   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
871   /* Compress and put in row numbers */
872   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
873   /* zero diagonal part of matrix */
874   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
875   /* handle off diagonal part of matrix */
876   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
877   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
878   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
879   for (i=0; i<len; i++) bb[lrows[i]] = 1;
880   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
881   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
884   if (x) {
885     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
888     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
889   }
890   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
891   /* remove zeroed rows of off diagonal matrix */
892   ii = aij->i;
893   for (i=0; i<len; i++) {
894     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
895   }
896   /* loop over all elements of off process part of matrix zeroing removed columns*/
897   if (aij->compressedrow.use) {
898     m    = aij->compressedrow.nrows;
899     ii   = aij->compressedrow.i;
900     ridx = aij->compressedrow.rindex;
901     for (i=0; i<m; i++) {
902       n  = ii[i+1] - ii[i];
903       aj = aij->j + ii[i];
904       aa = aij->a + ii[i];
905 
906       for (j=0; j<n; j++) {
907         if (PetscAbsScalar(mask[*aj])) {
908           if (b) bb[*ridx] -= *aa*xx[*aj];
909           *aa = 0.0;
910         }
911         aa++;
912         aj++;
913       }
914       ridx++;
915     }
916   } else { /* do not use compressed row format */
917     m = l->B->rmap->n;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij->a + ii[i];
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[i] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930     }
931   }
932   if (x) {
933     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
934     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
935   }
936   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
937   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
938   ierr = PetscFree(lrows);CHKERRQ(ierr);
939 
940   /* only change matrix nonzero state if pattern was allowed to be changed */
941   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
942     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
943     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPIAIJ"
950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
953   PetscErrorCode ierr;
954   PetscInt       nt;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
959   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatMultAdd_MPIAIJ"
980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999   PetscBool      merged;
1000 
1001   PetscFunctionBegin;
1002   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   if (!merged) {
1006     /* send it on its way */
1007     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008     /* do local part */
1009     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1010     /* receive remote parts: note this assumes the values are not actually */
1011     /* added in yy until the next line, */
1012     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1013   } else {
1014     /* do local part */
1015     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1016     /* send it on its way */
1017     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1018     /* values actually were received in the Begin() but we need to call this nop */
1019     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1026 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1027 {
1028   MPI_Comm       comm;
1029   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1030   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1031   IS             Me,Notme;
1032   PetscErrorCode ierr;
1033   PetscInt       M,N,first,last,*notme,i;
1034   PetscMPIInt    size;
1035 
1036   PetscFunctionBegin;
1037   /* Easy test: symmetric diagonal block */
1038   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1039   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* send it on its way */
1077   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   /* do local part */
1079   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* receive remote parts */
1081   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /*
1086   This only works correctly for square matrices where the subblock A->A is the
1087    diagonal block
1088 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1092 {
1093   PetscErrorCode ierr;
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095 
1096   PetscFunctionBegin;
1097   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1098   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1099   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatScale_MPIAIJ"
1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1112   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatDestroy_MPIAIJ"
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1142 
1143   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1152 #if defined(PETSC_HAVE_ELEMENTAL)
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1154 #endif
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1160 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1161 {
1162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1163   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1164   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1165   PetscErrorCode ierr;
1166   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1167   int            fd;
1168   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1169   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1170   PetscScalar    *column_values;
1171   PetscInt       message_count,flowcontrolcount;
1172   FILE           *file;
1173 
1174   PetscFunctionBegin;
1175   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1176   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1177   nz   = A->nz + B->nz;
1178   if (!rank) {
1179     header[0] = MAT_FILE_CLASSID;
1180     header[1] = mat->rmap->N;
1181     header[2] = mat->cmap->N;
1182 
1183     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1185     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     /* get largest number of rows any processor has */
1187     rlen  = mat->rmap->n;
1188     range = mat->rmap->range;
1189     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1190   } else {
1191     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     rlen = mat->rmap->n;
1193   }
1194 
1195   /* load up the local row counts */
1196   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1197   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1198 
1199   /* store the row lengths to the file */
1200   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1201   if (!rank) {
1202     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1203     for (i=1; i<size; i++) {
1204       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1205       rlen = range[i+1] - range[i];
1206       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     }
1209     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1210   } else {
1211     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1212     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1213     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1214   }
1215   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1216 
1217   /* load up the local column indices */
1218   nzmax = nz; /* th processor needs space a largest processor needs */
1219   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1220   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1221   cnt   = 0;
1222   for (i=0; i<mat->rmap->n; i++) {
1223     for (j=B->i[i]; j<B->i[i+1]; j++) {
1224       if ((col = garray[B->j[j]]) > cstart) break;
1225       column_indices[cnt++] = col;
1226     }
1227     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1228     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1229   }
1230   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1231 
1232   /* store the column indices to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1240       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1241       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     }
1244     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1245   } else {
1246     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1247     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1249     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1250   }
1251   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1252 
1253   /* load up the local column values */
1254   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1255   cnt  = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if (garray[B->j[j]] > cstart) break;
1259       column_values[cnt++] = B->a[j];
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1262     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_values);CHKERRQ(ierr);
1286 
1287   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1288   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #include <petscdraw.h>
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1295 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1296 {
1297   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1298   PetscErrorCode    ierr;
1299   PetscMPIInt       rank = aij->rank,size = aij->size;
1300   PetscBool         isdraw,iascii,isbinary;
1301   PetscViewer       sviewer;
1302   PetscViewerFormat format;
1303 
1304   PetscFunctionBegin;
1305   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1306   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1307   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1308   if (iascii) {
1309     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscBool inodes;
1313 
1314       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1315       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1317       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1318       if (!inodes) {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1321       } else {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1324       }
1325       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1327       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1332       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1337       if (inodes) {
1338         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1339       } else {
1340         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1349       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1350     } else {
1351       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1359   }
1360 
1361   {
1362     /* assemble the entire matrix onto first processor. */
1363     Mat        A;
1364     Mat_SeqAIJ *Aloc;
1365     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1366     MatScalar  *a;
1367 
1368     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1369     if (!rank) {
1370       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1371     } else {
1372       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1373     }
1374     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1375     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1376     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1377     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1378     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1379 
1380     /* copy over the A part */
1381     Aloc = (Mat_SeqAIJ*)aij->A->data;
1382     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1383     row  = mat->rmap->rstart;
1384     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1385     for (i=0; i<m; i++) {
1386       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1387       row++;
1388       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1389     }
1390     aj = Aloc->j;
1391     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1392 
1393     /* copy over the B part */
1394     Aloc = (Mat_SeqAIJ*)aij->B->data;
1395     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1396     row  = mat->rmap->rstart;
1397     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1398     ct   = cols;
1399     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1400     for (i=0; i<m; i++) {
1401       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1402       row++;
1403       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1404     }
1405     ierr = PetscFree(ct);CHKERRQ(ierr);
1406     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     /*
1409        Everyone has to call to draw the matrix since the graphics waits are
1410        synchronized across all processors that share the PetscDraw object
1411     */
1412     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1413     if (!rank) {
1414       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1415     }
1416     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1417     ierr = MatDestroy(&A);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatView_MPIAIJ"
1424 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1425 {
1426   PetscErrorCode ierr;
1427   PetscBool      iascii,isdraw,issocket,isbinary;
1428 
1429   PetscFunctionBegin;
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1434   if (iascii || isdraw || isbinary || issocket) {
1435     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatSOR_MPIAIJ"
1442 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1443 {
1444   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1445   PetscErrorCode ierr;
1446   Vec            bb1 = 0;
1447   PetscBool      hasop;
1448 
1449   PetscFunctionBegin;
1450   if (flag == SOR_APPLY_UPPER) {
1451     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452     PetscFunctionReturn(0);
1453   }
1454 
1455   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1456     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1457   }
1458 
1459   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1462       its--;
1463     }
1464 
1465     while (its--) {
1466       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1468 
1469       /* update rhs: bb1 = bb - B*x */
1470       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1471       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1472 
1473       /* local sweep */
1474       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1475     }
1476   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1477     if (flag & SOR_ZERO_INITIAL_GUESS) {
1478       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1479       its--;
1480     }
1481     while (its--) {
1482       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1484 
1485       /* update rhs: bb1 = bb - B*x */
1486       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1487       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1488 
1489       /* local sweep */
1490       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1491     }
1492   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1493     if (flag & SOR_ZERO_INITIAL_GUESS) {
1494       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1495       its--;
1496     }
1497     while (its--) {
1498       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500 
1501       /* update rhs: bb1 = bb - B*x */
1502       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1503       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1504 
1505       /* local sweep */
1506       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1507     }
1508   } else if (flag & SOR_EISENSTAT) {
1509     Vec xx1;
1510 
1511     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1513 
1514     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516     if (!mat->diag) {
1517       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1518       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1519     }
1520     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1521     if (hasop) {
1522       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1523     } else {
1524       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1525     }
1526     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1527 
1528     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1529 
1530     /* local sweep */
1531     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1532     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1533     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1534   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1535 
1536   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatPermute_MPIAIJ"
1542 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1543 {
1544   Mat            aA,aB,Aperm;
1545   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1546   PetscScalar    *aa,*ba;
1547   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1548   PetscSF        rowsf,sf;
1549   IS             parcolp = NULL;
1550   PetscBool      done;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1555   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1556   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1557   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1558 
1559   /* Invert row permutation to find out where my rows should go */
1560   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1561   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1562   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1563   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1564   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1566 
1567   /* Invert column permutation to find out where my columns should go */
1568   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1569   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1570   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1571   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1572   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1574   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1575 
1576   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1577   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1578   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1579 
1580   /* Find out where my gcols should go */
1581   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1582   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1584   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1585   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1586   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1588   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1589 
1590   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1592   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1593   for (i=0; i<m; i++) {
1594     PetscInt row = rdest[i],rowner;
1595     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1596     for (j=ai[i]; j<ai[i+1]; j++) {
1597       PetscInt cowner,col = cdest[aj[j]];
1598       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1599       if (rowner == cowner) dnnz[i]++;
1600       else onnz[i]++;
1601     }
1602     for (j=bi[i]; j<bi[i+1]; j++) {
1603       PetscInt cowner,col = gcdest[bj[j]];
1604       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1605       if (rowner == cowner) dnnz[i]++;
1606       else onnz[i]++;
1607     }
1608   }
1609   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1614 
1615   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1617   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1618   for (i=0; i<m; i++) {
1619     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1620     PetscInt j0,rowlen;
1621     rowlen = ai[i+1] - ai[i];
1622     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1623       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1624       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1625     }
1626     rowlen = bi[i+1] - bi[i];
1627     for (j0=j=0; j<rowlen; j0=j) {
1628       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1629       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1630     }
1631   }
1632   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1635   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1637   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1638   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1639   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1640   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1641   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1642   *B = Aperm;
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1648 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1649 {
1650   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1651   Mat            A    = mat->A,B = mat->B;
1652   PetscErrorCode ierr;
1653   PetscReal      isend[5],irecv[5];
1654 
1655   PetscFunctionBegin;
1656   info->block_size = 1.0;
1657   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1658 
1659   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1660   isend[3] = info->memory;  isend[4] = info->mallocs;
1661 
1662   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1663 
1664   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1665   isend[3] += info->memory;  isend[4] += info->mallocs;
1666   if (flag == MAT_LOCAL) {
1667     info->nz_used      = isend[0];
1668     info->nz_allocated = isend[1];
1669     info->nz_unneeded  = isend[2];
1670     info->memory       = isend[3];
1671     info->mallocs      = isend[4];
1672   } else if (flag == MAT_GLOBAL_MAX) {
1673     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   } else if (flag == MAT_GLOBAL_SUM) {
1681     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1682 
1683     info->nz_used      = irecv[0];
1684     info->nz_allocated = irecv[1];
1685     info->nz_unneeded  = irecv[2];
1686     info->memory       = irecv[3];
1687     info->mallocs      = irecv[4];
1688   }
1689   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1690   info->fill_ratio_needed = 0;
1691   info->factor_mallocs    = 0;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatSetOption_MPIAIJ"
1697 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1698 {
1699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   switch (op) {
1704   case MAT_NEW_NONZERO_LOCATIONS:
1705   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1706   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1707   case MAT_KEEP_NONZERO_PATTERN:
1708   case MAT_NEW_NONZERO_LOCATION_ERR:
1709   case MAT_USE_INODES:
1710   case MAT_IGNORE_ZERO_ENTRIES:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_ROW_ORIENTED:
1716     a->roworiented = flg;
1717 
1718     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1719     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1720     break;
1721   case MAT_NEW_DIAGONALS:
1722     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1723     break;
1724   case MAT_IGNORE_OFF_PROC_ENTRIES:
1725     a->donotstash = flg;
1726     break;
1727   case MAT_SPD:
1728     A->spd_set = PETSC_TRUE;
1729     A->spd     = flg;
1730     if (flg) {
1731       A->symmetric                  = PETSC_TRUE;
1732       A->structurally_symmetric     = PETSC_TRUE;
1733       A->symmetric_set              = PETSC_TRUE;
1734       A->structurally_symmetric_set = PETSC_TRUE;
1735     }
1736     break;
1737   case MAT_SYMMETRIC:
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_STRUCTURALLY_SYMMETRIC:
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_HERMITIAN:
1744     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1745     break;
1746   case MAT_SYMMETRY_ETERNAL:
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   default:
1750     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1751   }
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatGetRow_MPIAIJ"
1757 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1758 {
1759   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1760   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1761   PetscErrorCode ierr;
1762   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1763   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1764   PetscInt       *cmap,*idx_p;
1765 
1766   PetscFunctionBegin;
1767   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1768   mat->getrowactive = PETSC_TRUE;
1769 
1770   if (!mat->rowvalues && (idx || v)) {
1771     /*
1772         allocate enough space to hold information from the longest row.
1773     */
1774     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1775     PetscInt   max = 1,tmp;
1776     for (i=0; i<matin->rmap->n; i++) {
1777       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1778       if (max < tmp) max = tmp;
1779     }
1780     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1781   }
1782 
1783   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1784   lrow = row - rstart;
1785 
1786   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1787   if (!v)   {pvA = 0; pvB = 0;}
1788   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1789   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1790   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1791   nztot = nzA + nzB;
1792 
1793   cmap = mat->garray;
1794   if (v  || idx) {
1795     if (nztot) {
1796       /* Sort by increasing column numbers, assuming A and B already sorted */
1797       PetscInt imark = -1;
1798       if (v) {
1799         *v = v_p = mat->rowvalues;
1800         for (i=0; i<nzB; i++) {
1801           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1802           else break;
1803         }
1804         imark = i;
1805         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1806         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1807       }
1808       if (idx) {
1809         *idx = idx_p = mat->rowindices;
1810         if (imark > -1) {
1811           for (i=0; i<imark; i++) {
1812             idx_p[i] = cmap[cworkB[i]];
1813           }
1814         } else {
1815           for (i=0; i<nzB; i++) {
1816             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1817             else break;
1818           }
1819           imark = i;
1820         }
1821         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1822         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1823       }
1824     } else {
1825       if (idx) *idx = 0;
1826       if (v)   *v   = 0;
1827     }
1828   }
1829   *nz  = nztot;
1830   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1831   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1837 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1838 {
1839   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1840 
1841   PetscFunctionBegin;
1842   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1843   aij->getrowactive = PETSC_FALSE;
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatNorm_MPIAIJ"
1849 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1850 {
1851   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1852   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1853   PetscErrorCode ierr;
1854   PetscInt       i,j,cstart = mat->cmap->rstart;
1855   PetscReal      sum = 0.0;
1856   MatScalar      *v;
1857 
1858   PetscFunctionBegin;
1859   if (aij->size == 1) {
1860     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1861   } else {
1862     if (type == NORM_FROBENIUS) {
1863       v = amat->a;
1864       for (i=0; i<amat->nz; i++) {
1865         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1866       }
1867       v = bmat->a;
1868       for (i=0; i<bmat->nz; i++) {
1869         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1870       }
1871       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1872       *norm = PetscSqrtReal(*norm);
1873     } else if (type == NORM_1) { /* max column norm */
1874       PetscReal *tmp,*tmp2;
1875       PetscInt  *jj,*garray = aij->garray;
1876       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1877       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1878       *norm = 0.0;
1879       v     = amat->a; jj = amat->j;
1880       for (j=0; j<amat->nz; j++) {
1881         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1882       }
1883       v = bmat->a; jj = bmat->j;
1884       for (j=0; j<bmat->nz; j++) {
1885         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1886       }
1887       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1888       for (j=0; j<mat->cmap->N; j++) {
1889         if (tmp2[j] > *norm) *norm = tmp2[j];
1890       }
1891       ierr = PetscFree(tmp);CHKERRQ(ierr);
1892       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1893     } else if (type == NORM_INFINITY) { /* max row norm */
1894       PetscReal ntemp = 0.0;
1895       for (j=0; j<aij->A->rmap->n; j++) {
1896         v   = amat->a + amat->i[j];
1897         sum = 0.0;
1898         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1899           sum += PetscAbsScalar(*v); v++;
1900         }
1901         v = bmat->a + bmat->i[j];
1902         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1903           sum += PetscAbsScalar(*v); v++;
1904         }
1905         if (sum > ntemp) ntemp = sum;
1906       }
1907       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1908     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatTranspose_MPIAIJ"
1915 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1916 {
1917   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1918   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1919   PetscErrorCode ierr;
1920   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1921   PetscInt       cstart = A->cmap->rstart,ncol;
1922   Mat            B;
1923   MatScalar      *array;
1924 
1925   PetscFunctionBegin;
1926   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1927 
1928   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1929   ai = Aloc->i; aj = Aloc->j;
1930   bi = Bloc->i; bj = Bloc->j;
1931   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1932     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1933     PetscSFNode          *oloc;
1934     PETSC_UNUSED PetscSF sf;
1935 
1936     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1937     /* compute d_nnz for preallocation */
1938     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<ai[ma]; i++) {
1940       d_nnz[aj[i]]++;
1941       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1942     }
1943     /* compute local off-diagonal contributions */
1944     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1945     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1946     /* map those to global */
1947     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1948     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1949     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1950     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1951     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1952     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1953     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1954 
1955     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1956     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1957     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1959     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1960     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1961   } else {
1962     B    = *matout;
1963     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1964     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1965   }
1966 
1967   /* copy over the A part */
1968   array = Aloc->a;
1969   row   = A->rmap->rstart;
1970   for (i=0; i<ma; i++) {
1971     ncol = ai[i+1]-ai[i];
1972     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1973     row++;
1974     array += ncol; aj += ncol;
1975   }
1976   aj = Aloc->j;
1977   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1978 
1979   /* copy over the B part */
1980   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1981   array = Bloc->a;
1982   row   = A->rmap->rstart;
1983   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1984   cols_tmp = cols;
1985   for (i=0; i<mb; i++) {
1986     ncol = bi[i+1]-bi[i];
1987     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1988     row++;
1989     array += ncol; cols_tmp += ncol;
1990   }
1991   ierr = PetscFree(cols);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1996     *matout = B;
1997   } else {
1998     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2005 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2006 {
2007   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2008   Mat            a    = aij->A,b = aij->B;
2009   PetscErrorCode ierr;
2010   PetscInt       s1,s2,s3;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2014   if (rr) {
2015     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2016     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2017     /* Overlap communication with computation. */
2018     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2019   }
2020   if (ll) {
2021     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2022     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2023     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2024   }
2025   /* scale  the diagonal block */
2026   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2027 
2028   if (rr) {
2029     /* Do a scatter end and then right scale the off-diagonal block */
2030     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2031     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2032   }
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2038 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2039 {
2040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2041   PetscErrorCode ierr;
2042 
2043   PetscFunctionBegin;
2044   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatEqual_MPIAIJ"
2050 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2051 {
2052   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2053   Mat            a,b,c,d;
2054   PetscBool      flg;
2055   PetscErrorCode ierr;
2056 
2057   PetscFunctionBegin;
2058   a = matA->A; b = matA->B;
2059   c = matB->A; d = matB->B;
2060 
2061   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2062   if (flg) {
2063     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2064   }
2065   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatCopy_MPIAIJ"
2071 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2072 {
2073   PetscErrorCode ierr;
2074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2075   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2076 
2077   PetscFunctionBegin;
2078   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2079   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2080     /* because of the column compression in the off-processor part of the matrix a->B,
2081        the number of columns in a->B and b->B may be different, hence we cannot call
2082        the MatCopy() directly on the two parts. If need be, we can provide a more
2083        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2084        then copying the submatrices */
2085     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2086   } else {
2087     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2088     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatSetUp_MPIAIJ"
2095 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2096 {
2097   PetscErrorCode ierr;
2098 
2099   PetscFunctionBegin;
2100   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 /*
2105    Computes the number of nonzeros per row needed for preallocation when X and Y
2106    have different nonzero structure.
2107 */
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2111 {
2112   PetscInt       i,j,k,nzx,nzy;
2113 
2114   PetscFunctionBegin;
2115   /* Set the number of nonzeros in the new matrix */
2116   for (i=0; i<m; i++) {
2117     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2118     nzx = xi[i+1] - xi[i];
2119     nzy = yi[i+1] - yi[i];
2120     nnz[i] = 0;
2121     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2122       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2123       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2124       nnz[i]++;
2125     }
2126     for (; k<nzy; k++) nnz[i]++;
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2134 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2135 {
2136   PetscErrorCode ierr;
2137   PetscInt       m = Y->rmap->N;
2138   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2139   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2140 
2141   PetscFunctionBegin;
2142   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatAXPY_MPIAIJ"
2148 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2149 {
2150   PetscErrorCode ierr;
2151   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2152   PetscBLASInt   bnz,one=1;
2153   Mat_SeqAIJ     *x,*y;
2154 
2155   PetscFunctionBegin;
2156   if (str == SAME_NONZERO_PATTERN) {
2157     PetscScalar alpha = a;
2158     x    = (Mat_SeqAIJ*)xx->A->data;
2159     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2160     y    = (Mat_SeqAIJ*)yy->A->data;
2161     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2162     x    = (Mat_SeqAIJ*)xx->B->data;
2163     y    = (Mat_SeqAIJ*)yy->B->data;
2164     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2165     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2166     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2167   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2168     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2169   } else {
2170     Mat      B;
2171     PetscInt *nnz_d,*nnz_o;
2172     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2173     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2174     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2175     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2176     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2177     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2178     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2179     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2180     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2181     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2182     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2183     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2184     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2185     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatConjugate_MPIAIJ"
2194 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2195 {
2196 #if defined(PETSC_USE_COMPLEX)
2197   PetscErrorCode ierr;
2198   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2202   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2203 #else
2204   PetscFunctionBegin;
2205 #endif
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatRealPart_MPIAIJ"
2211 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2212 {
2213   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2218   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2224 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2225 {
2226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2231   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #if defined(PETSC_HAVE_PBGL)
2236 
2237 #include <boost/parallel/mpi/bsp_process_group.hpp>
2238 #include <boost/graph/distributed/ilu_default_graph.hpp>
2239 #include <boost/graph/distributed/ilu_0_block.hpp>
2240 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2241 #include <boost/graph/distributed/petsc/interface.hpp>
2242 #include <boost/multi_array.hpp>
2243 #include <boost/parallel/distributed_property_map->hpp>
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2247 /*
2248   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2249 */
2250 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2251 {
2252   namespace petsc = boost::distributed::petsc;
2253 
2254   namespace graph_dist = boost::graph::distributed;
2255   using boost::graph::distributed::ilu_default::process_group_type;
2256   using boost::graph::ilu_permuted;
2257 
2258   PetscBool      row_identity, col_identity;
2259   PetscContainer c;
2260   PetscInt       m, n, M, N;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2265   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2266   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2267   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2268 
2269   process_group_type pg;
2270   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2271   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2272   lgraph_type& level_graph = *lgraph_p;
2273   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2274 
2275   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2276   ilu_permuted(level_graph);
2277 
2278   /* put together the new matrix */
2279   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2280   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2281   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2282   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2283   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2284   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2285   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2287 
2288   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2289   ierr = PetscContainerSetPointer(c, lgraph_p);
2290   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2291   ierr = PetscContainerDestroy(&c);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2297 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2298 {
2299   PetscFunctionBegin;
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatSolve_MPIAIJ"
2305 /*
2306   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2307 */
2308 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2309 {
2310   namespace graph_dist = boost::graph::distributed;
2311 
2312   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2313   lgraph_type    *lgraph_p;
2314   PetscContainer c;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2319   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2320   ierr = VecCopy(b, x);CHKERRQ(ierr);
2321 
2322   PetscScalar *array_x;
2323   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2324   PetscInt sx;
2325   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2326 
2327   PetscScalar *array_b;
2328   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2329   PetscInt sb;
2330   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2331 
2332   lgraph_type& level_graph = *lgraph_p;
2333   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2334 
2335   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2336   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2337   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2338 
2339   typedef boost::iterator_property_map<array_ref_type::iterator,
2340                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2341   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2342   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2343 
2344   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2345   PetscFunctionReturn(0);
2346 }
2347 #endif
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2351 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2352 {
2353   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2354   PetscErrorCode ierr;
2355   PetscInt       i,*idxb = 0;
2356   PetscScalar    *va,*vb;
2357   Vec            vtmp;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2361   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2362   if (idx) {
2363     for (i=0; i<A->rmap->n; i++) {
2364       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2365     }
2366   }
2367 
2368   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2369   if (idx) {
2370     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2371   }
2372   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2373   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2374 
2375   for (i=0; i<A->rmap->n; i++) {
2376     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2377       va[i] = vb[i];
2378       if (idx) idx[i] = a->garray[idxb[i]];
2379     }
2380   }
2381 
2382   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2383   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2384   ierr = PetscFree(idxb);CHKERRQ(ierr);
2385   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2391 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2392 {
2393   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2394   PetscErrorCode ierr;
2395   PetscInt       i,*idxb = 0;
2396   PetscScalar    *va,*vb;
2397   Vec            vtmp;
2398 
2399   PetscFunctionBegin;
2400   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2401   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2402   if (idx) {
2403     for (i=0; i<A->cmap->n; i++) {
2404       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2405     }
2406   }
2407 
2408   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2409   if (idx) {
2410     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2411   }
2412   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2413   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2414 
2415   for (i=0; i<A->rmap->n; i++) {
2416     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2417       va[i] = vb[i];
2418       if (idx) idx[i] = a->garray[idxb[i]];
2419     }
2420   }
2421 
2422   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2424   ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2432 {
2433   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2434   PetscInt       n      = A->rmap->n;
2435   PetscInt       cstart = A->cmap->rstart;
2436   PetscInt       *cmap  = mat->garray;
2437   PetscInt       *diagIdx, *offdiagIdx;
2438   Vec            diagV, offdiagV;
2439   PetscScalar    *a, *diagA, *offdiagA;
2440   PetscInt       r;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   for (r = 0; r < n; ++r) {
2453     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2454       a[r]   = diagA[r];
2455       idx[r] = cstart + diagIdx[r];
2456     } else {
2457       a[r]   = offdiagA[r];
2458       idx[r] = cmap[offdiagIdx[r]];
2459     }
2460   }
2461   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2464   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2465   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2466   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2472 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2473 {
2474   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2475   PetscInt       n      = A->rmap->n;
2476   PetscInt       cstart = A->cmap->rstart;
2477   PetscInt       *cmap  = mat->garray;
2478   PetscInt       *diagIdx, *offdiagIdx;
2479   Vec            diagV, offdiagV;
2480   PetscScalar    *a, *diagA, *offdiagA;
2481   PetscInt       r;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2490   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   for (r = 0; r < n; ++r) {
2494     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2495       a[r]   = diagA[r];
2496       idx[r] = cstart + diagIdx[r];
2497     } else {
2498       a[r]   = offdiagA[r];
2499       idx[r] = cmap[offdiagIdx[r]];
2500     }
2501   }
2502   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2503   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2506   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2507   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2513 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2514 {
2515   PetscErrorCode ierr;
2516   Mat            *dummy;
2517 
2518   PetscFunctionBegin;
2519   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2520   *newmat = *dummy;
2521   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2527 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2528 {
2529   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2530   PetscErrorCode ierr;
2531 
2532   PetscFunctionBegin;
2533   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2539 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2540 {
2541   PetscErrorCode ierr;
2542   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2543 
2544   PetscFunctionBegin;
2545   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2546   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2547   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2548   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 /* -------------------------------------------------------------------*/
2553 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2554                                        MatGetRow_MPIAIJ,
2555                                        MatRestoreRow_MPIAIJ,
2556                                        MatMult_MPIAIJ,
2557                                 /* 4*/ MatMultAdd_MPIAIJ,
2558                                        MatMultTranspose_MPIAIJ,
2559                                        MatMultTransposeAdd_MPIAIJ,
2560 #if defined(PETSC_HAVE_PBGL)
2561                                        MatSolve_MPIAIJ,
2562 #else
2563                                        0,
2564 #endif
2565                                        0,
2566                                        0,
2567                                 /*10*/ 0,
2568                                        0,
2569                                        0,
2570                                        MatSOR_MPIAIJ,
2571                                        MatTranspose_MPIAIJ,
2572                                 /*15*/ MatGetInfo_MPIAIJ,
2573                                        MatEqual_MPIAIJ,
2574                                        MatGetDiagonal_MPIAIJ,
2575                                        MatDiagonalScale_MPIAIJ,
2576                                        MatNorm_MPIAIJ,
2577                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2578                                        MatAssemblyEnd_MPIAIJ,
2579                                        MatSetOption_MPIAIJ,
2580                                        MatZeroEntries_MPIAIJ,
2581                                 /*24*/ MatZeroRows_MPIAIJ,
2582                                        0,
2583 #if defined(PETSC_HAVE_PBGL)
2584                                        0,
2585 #else
2586                                        0,
2587 #endif
2588                                        0,
2589                                        0,
2590                                 /*29*/ MatSetUp_MPIAIJ,
2591 #if defined(PETSC_HAVE_PBGL)
2592                                        0,
2593 #else
2594                                        0,
2595 #endif
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*34*/ MatDuplicate_MPIAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*39*/ MatAXPY_MPIAIJ,
2605                                        MatGetSubMatrices_MPIAIJ,
2606                                        MatIncreaseOverlap_MPIAIJ,
2607                                        MatGetValues_MPIAIJ,
2608                                        MatCopy_MPIAIJ,
2609                                 /*44*/ MatGetRowMax_MPIAIJ,
2610                                        MatScale_MPIAIJ,
2611                                        0,
2612                                        MatDiagonalSet_MPIAIJ,
2613                                        MatZeroRowsColumns_MPIAIJ,
2614                                 /*49*/ MatSetRandom_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2620                                        0,
2621                                        MatSetUnfactored_MPIAIJ,
2622                                        MatPermute_MPIAIJ,
2623                                        0,
2624                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2625                                        MatDestroy_MPIAIJ,
2626                                        MatView_MPIAIJ,
2627                                        0,
2628                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2629                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2630                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2635                                        MatGetRowMinAbs_MPIAIJ,
2636                                        0,
2637                                        MatSetColoring_MPIAIJ,
2638                                        0,
2639                                        MatSetValuesAdifor_MPIAIJ,
2640                                 /*75*/ MatFDColoringApply_AIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        MatFindZeroDiagonals_MPIAIJ,
2645                                 /*80*/ 0,
2646                                        0,
2647                                        0,
2648                                 /*83*/ MatLoad_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2655                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2656                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2657                                        MatPtAP_MPIAIJ_MPIAIJ,
2658                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2659                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*99*/ 0,
2665                                        0,
2666                                        0,
2667                                        MatConjugate_MPIAIJ,
2668                                        0,
2669                                 /*104*/MatSetValuesRow_MPIAIJ,
2670                                        MatRealPart_MPIAIJ,
2671                                        MatImaginaryPart_MPIAIJ,
2672                                        0,
2673                                        0,
2674                                 /*109*/0,
2675                                        0,
2676                                        MatGetRowMin_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*119*/0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        MatGetMultiProcBlock_MPIAIJ,
2689                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2690                                        MatGetColumnNorms_MPIAIJ,
2691                                        MatInvertBlockDiagonal_MPIAIJ,
2692                                        0,
2693                                        MatGetSubMatricesParallel_MPIAIJ,
2694                                 /*129*/0,
2695                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2696                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2697                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2698                                        0,
2699                                 /*134*/0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                 /*139*/0,
2705                                        0,
2706                                        0,
2707                                        MatFDColoringSetUp_MPIXAIJ,
2708                                        0,
2709                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2710 };
2711 
2712 /* ----------------------------------------------------------------------------------------*/
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2716 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2717 {
2718   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2723   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2729 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2742 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2743 {
2744   Mat_MPIAIJ     *b;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2749   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2750   b = (Mat_MPIAIJ*)B->data;
2751 
2752   if (!B->preallocated) {
2753     /* Explicitly create 2 MATSEQAIJ matrices. */
2754     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2755     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2756     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2757     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2758     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2759     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2760     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2761     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2762     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2763     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2764   }
2765 
2766   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2767   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2768   B->preallocated = PETSC_TRUE;
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2774 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2775 {
2776   Mat            mat;
2777   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   *newmat = 0;
2782   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2783   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2784   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2785   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2786   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2787   a       = (Mat_MPIAIJ*)mat->data;
2788 
2789   mat->factortype   = matin->factortype;
2790   mat->assembled    = PETSC_TRUE;
2791   mat->insertmode   = NOT_SET_VALUES;
2792   mat->preallocated = PETSC_TRUE;
2793 
2794   a->size         = oldmat->size;
2795   a->rank         = oldmat->rank;
2796   a->donotstash   = oldmat->donotstash;
2797   a->roworiented  = oldmat->roworiented;
2798   a->rowindices   = 0;
2799   a->rowvalues    = 0;
2800   a->getrowactive = PETSC_FALSE;
2801 
2802   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2804 
2805   if (oldmat->colmap) {
2806 #if defined(PETSC_USE_CTABLE)
2807     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2808 #else
2809     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2810     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2811     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2812 #endif
2813   } else a->colmap = 0;
2814   if (oldmat->garray) {
2815     PetscInt len;
2816     len  = oldmat->B->cmap->n;
2817     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2818     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2819     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2820   } else a->garray = 0;
2821 
2822   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2823   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2824   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2825   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2826   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2827   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2828   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2829   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2830   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2831   *newmat = mat;
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatLoad_MPIAIJ"
2839 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2840 {
2841   PetscScalar    *vals,*svals;
2842   MPI_Comm       comm;
2843   PetscErrorCode ierr;
2844   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2845   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2846   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2847   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2848   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2849   int            fd;
2850   PetscInt       bs = newMat->rmap->bs;
2851 
2852   PetscFunctionBegin;
2853   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2854   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2855   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2856   if (!rank) {
2857     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2858     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2859     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2860   }
2861 
2862   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2863   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2864   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2865   if (bs < 0) bs = 1;
2866 
2867   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2868 
2869   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2870   M    = header[1]; N = header[2];
2871   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2872   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2873   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2874 
2875   /* If global sizes are set, check if they are consistent with that given in the file */
2876   if (sizesset) {
2877     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2878   }
2879   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2880   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2881 
2882   /* determine ownership of all (block) rows */
2883   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2884   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2885   else m = newMat->rmap->n; /* Set by user */
2886 
2887   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2888   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2889 
2890   /* First process needs enough room for process with most rows */
2891   if (!rank) {
2892     mmax = rowners[1];
2893     for (i=2; i<=size; i++) {
2894       mmax = PetscMax(mmax, rowners[i]);
2895     }
2896   } else mmax = -1;             /* unused, but compilers complain */
2897 
2898   rowners[0] = 0;
2899   for (i=2; i<=size; i++) {
2900     rowners[i] += rowners[i-1];
2901   }
2902   rstart = rowners[rank];
2903   rend   = rowners[rank+1];
2904 
2905   /* distribute row lengths to all processors */
2906   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2907   if (!rank) {
2908     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2909     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2910     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2911     for (j=0; j<m; j++) {
2912       procsnz[0] += ourlens[j];
2913     }
2914     for (i=1; i<size; i++) {
2915       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2916       /* calculate the number of nonzeros on each processor */
2917       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2918         procsnz[i] += rowlengths[j];
2919       }
2920       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2921     }
2922     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2923   } else {
2924     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2925   }
2926 
2927   if (!rank) {
2928     /* determine max buffer needed and allocate it */
2929     maxnz = 0;
2930     for (i=0; i<size; i++) {
2931       maxnz = PetscMax(maxnz,procsnz[i]);
2932     }
2933     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2934 
2935     /* read in my part of the matrix column indices  */
2936     nz   = procsnz[0];
2937     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2938     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2939 
2940     /* read in every one elses and ship off */
2941     for (i=1; i<size; i++) {
2942       nz   = procsnz[i];
2943       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2944       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2945     }
2946     ierr = PetscFree(cols);CHKERRQ(ierr);
2947   } else {
2948     /* determine buffer space needed for message */
2949     nz = 0;
2950     for (i=0; i<m; i++) {
2951       nz += ourlens[i];
2952     }
2953     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2954 
2955     /* receive message of column indices*/
2956     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2957   }
2958 
2959   /* determine column ownership if matrix is not square */
2960   if (N != M) {
2961     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2962     else n = newMat->cmap->n;
2963     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2964     cstart = cend - n;
2965   } else {
2966     cstart = rstart;
2967     cend   = rend;
2968     n      = cend - cstart;
2969   }
2970 
2971   /* loop over local rows, determining number of off diagonal entries */
2972   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2973   jj   = 0;
2974   for (i=0; i<m; i++) {
2975     for (j=0; j<ourlens[i]; j++) {
2976       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2977       jj++;
2978     }
2979   }
2980 
2981   for (i=0; i<m; i++) {
2982     ourlens[i] -= offlens[i];
2983   }
2984   if (!sizesset) {
2985     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2986   }
2987 
2988   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2989 
2990   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2991 
2992   for (i=0; i<m; i++) {
2993     ourlens[i] += offlens[i];
2994   }
2995 
2996   if (!rank) {
2997     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2998 
2999     /* read in my part of the matrix numerical values  */
3000     nz   = procsnz[0];
3001     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3002 
3003     /* insert into matrix */
3004     jj      = rstart;
3005     smycols = mycols;
3006     svals   = vals;
3007     for (i=0; i<m; i++) {
3008       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3009       smycols += ourlens[i];
3010       svals   += ourlens[i];
3011       jj++;
3012     }
3013 
3014     /* read in other processors and ship out */
3015     for (i=1; i<size; i++) {
3016       nz   = procsnz[i];
3017       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3018       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3019     }
3020     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3021   } else {
3022     /* receive numeric values */
3023     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3024 
3025     /* receive message of values*/
3026     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3027 
3028     /* insert into matrix */
3029     jj      = rstart;
3030     smycols = mycols;
3031     svals   = vals;
3032     for (i=0; i<m; i++) {
3033       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3034       smycols += ourlens[i];
3035       svals   += ourlens[i];
3036       jj++;
3037     }
3038   }
3039   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3040   ierr = PetscFree(vals);CHKERRQ(ierr);
3041   ierr = PetscFree(mycols);CHKERRQ(ierr);
3042   ierr = PetscFree(rowners);CHKERRQ(ierr);
3043   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3044   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3050 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3051 {
3052   PetscErrorCode ierr;
3053   IS             iscol_local;
3054   PetscInt       csize;
3055 
3056   PetscFunctionBegin;
3057   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3058   if (call == MAT_REUSE_MATRIX) {
3059     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3060     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3061   } else {
3062     PetscInt cbs;
3063     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3064     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3065     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3066   }
3067   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3068   if (call == MAT_INITIAL_MATRIX) {
3069     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3070     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3071   }
3072   PetscFunctionReturn(0);
3073 }
3074 
3075 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3076 #undef __FUNCT__
3077 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3078 /*
3079     Not great since it makes two copies of the submatrix, first an SeqAIJ
3080   in local and then by concatenating the local matrices the end result.
3081   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3082 
3083   Note: This requires a sequential iscol with all indices.
3084 */
3085 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3086 {
3087   PetscErrorCode ierr;
3088   PetscMPIInt    rank,size;
3089   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3090   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3091   PetscBool      allcolumns, colflag;
3092   Mat            M,Mreuse;
3093   MatScalar      *vwork,*aa;
3094   MPI_Comm       comm;
3095   Mat_SeqAIJ     *aij;
3096 
3097   PetscFunctionBegin;
3098   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3101 
3102   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3103   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3104   if (colflag && ncol == mat->cmap->N) {
3105     allcolumns = PETSC_TRUE;
3106   } else {
3107     allcolumns = PETSC_FALSE;
3108   }
3109   if (call ==  MAT_REUSE_MATRIX) {
3110     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3111     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3112     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3113   } else {
3114     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3115   }
3116 
3117   /*
3118       m - number of local rows
3119       n - number of columns (same on all processors)
3120       rstart - first row in new global matrix generated
3121   */
3122   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3123   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3124   if (call == MAT_INITIAL_MATRIX) {
3125     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3126     ii  = aij->i;
3127     jj  = aij->j;
3128 
3129     /*
3130         Determine the number of non-zeros in the diagonal and off-diagonal
3131         portions of the matrix in order to do correct preallocation
3132     */
3133 
3134     /* first get start and end of "diagonal" columns */
3135     if (csize == PETSC_DECIDE) {
3136       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3137       if (mglobal == n) { /* square matrix */
3138         nlocal = m;
3139       } else {
3140         nlocal = n/size + ((n % size) > rank);
3141       }
3142     } else {
3143       nlocal = csize;
3144     }
3145     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3146     rstart = rend - nlocal;
3147     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3148 
3149     /* next, compute all the lengths */
3150     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3151     olens = dlens + m;
3152     for (i=0; i<m; i++) {
3153       jend = ii[i+1] - ii[i];
3154       olen = 0;
3155       dlen = 0;
3156       for (j=0; j<jend; j++) {
3157         if (*jj < rstart || *jj >= rend) olen++;
3158         else dlen++;
3159         jj++;
3160       }
3161       olens[i] = olen;
3162       dlens[i] = dlen;
3163     }
3164     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3165     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3166     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3167     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3168     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3169     ierr = PetscFree(dlens);CHKERRQ(ierr);
3170   } else {
3171     PetscInt ml,nl;
3172 
3173     M    = *newmat;
3174     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3175     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3176     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3177     /*
3178          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3179        rather than the slower MatSetValues().
3180     */
3181     M->was_assembled = PETSC_TRUE;
3182     M->assembled     = PETSC_FALSE;
3183   }
3184   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3185   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3186   ii   = aij->i;
3187   jj   = aij->j;
3188   aa   = aij->a;
3189   for (i=0; i<m; i++) {
3190     row   = rstart + i;
3191     nz    = ii[i+1] - ii[i];
3192     cwork = jj;     jj += nz;
3193     vwork = aa;     aa += nz;
3194     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3195   }
3196 
3197   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3198   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3199   *newmat = M;
3200 
3201   /* save submatrix used in processor for next request */
3202   if (call ==  MAT_INITIAL_MATRIX) {
3203     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3204     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3205   }
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3211 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3212 {
3213   PetscInt       m,cstart, cend,j,nnz,i,d;
3214   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3215   const PetscInt *JJ;
3216   PetscScalar    *values;
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3221 
3222   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3223   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3224   m      = B->rmap->n;
3225   cstart = B->cmap->rstart;
3226   cend   = B->cmap->rend;
3227   rstart = B->rmap->rstart;
3228 
3229   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3230 
3231 #if defined(PETSC_USE_DEBUGGING)
3232   for (i=0; i<m; i++) {
3233     nnz = Ii[i+1]- Ii[i];
3234     JJ  = J + Ii[i];
3235     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3236     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3237     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3238   }
3239 #endif
3240 
3241   for (i=0; i<m; i++) {
3242     nnz     = Ii[i+1]- Ii[i];
3243     JJ      = J + Ii[i];
3244     nnz_max = PetscMax(nnz_max,nnz);
3245     d       = 0;
3246     for (j=0; j<nnz; j++) {
3247       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3248     }
3249     d_nnz[i] = d;
3250     o_nnz[i] = nnz - d;
3251   }
3252   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3253   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3254 
3255   if (v) values = (PetscScalar*)v;
3256   else {
3257     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3258   }
3259 
3260   for (i=0; i<m; i++) {
3261     ii   = i + rstart;
3262     nnz  = Ii[i+1]- Ii[i];
3263     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3264   }
3265   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3267 
3268   if (!v) {
3269     ierr = PetscFree(values);CHKERRQ(ierr);
3270   }
3271   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3277 /*@
3278    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3279    (the default parallel PETSc format).
3280 
3281    Collective on MPI_Comm
3282 
3283    Input Parameters:
3284 +  B - the matrix
3285 .  i - the indices into j for the start of each local row (starts with zero)
3286 .  j - the column indices for each local row (starts with zero)
3287 -  v - optional values in the matrix
3288 
3289    Level: developer
3290 
3291    Notes:
3292        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3293      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3294      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3295 
3296        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3297 
3298        The format which is used for the sparse matrix input, is equivalent to a
3299     row-major ordering.. i.e for the following matrix, the input data expected is
3300     as shown:
3301 
3302         1 0 0
3303         2 0 3     P0
3304        -------
3305         4 5 6     P1
3306 
3307      Process0 [P0]: rows_owned=[0,1]
3308         i =  {0,1,3}  [size = nrow+1  = 2+1]
3309         j =  {0,0,2}  [size = nz = 6]
3310         v =  {1,2,3}  [size = nz = 6]
3311 
3312      Process1 [P1]: rows_owned=[2]
3313         i =  {0,3}    [size = nrow+1  = 1+1]
3314         j =  {0,1,2}  [size = nz = 6]
3315         v =  {4,5,6}  [size = nz = 6]
3316 
3317 .keywords: matrix, aij, compressed row, sparse, parallel
3318 
3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3320           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3321 @*/
3322 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3323 {
3324   PetscErrorCode ierr;
3325 
3326   PetscFunctionBegin;
3327   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 #undef __FUNCT__
3332 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3333 /*@C
3334    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3335    (the default parallel PETSc format).  For good matrix assembly performance
3336    the user should preallocate the matrix storage by setting the parameters
3337    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3338    performance can be increased by more than a factor of 50.
3339 
3340    Collective on MPI_Comm
3341 
3342    Input Parameters:
3343 +  B - the matrix
3344 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3345            (same value is used for all local rows)
3346 .  d_nnz - array containing the number of nonzeros in the various rows of the
3347            DIAGONAL portion of the local submatrix (possibly different for each row)
3348            or NULL, if d_nz is used to specify the nonzero structure.
3349            The size of this array is equal to the number of local rows, i.e 'm'.
3350            For matrices that will be factored, you must leave room for (and set)
3351            the diagonal entry even if it is zero.
3352 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3353            submatrix (same value is used for all local rows).
3354 -  o_nnz - array containing the number of nonzeros in the various rows of the
3355            OFF-DIAGONAL portion of the local submatrix (possibly different for
3356            each row) or NULL, if o_nz is used to specify the nonzero
3357            structure. The size of this array is equal to the number
3358            of local rows, i.e 'm'.
3359 
3360    If the *_nnz parameter is given then the *_nz parameter is ignored
3361 
3362    The AIJ format (also called the Yale sparse matrix format or
3363    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3364    storage.  The stored row and column indices begin with zero.
3365    See Users-Manual: ch_mat for details.
3366 
3367    The parallel matrix is partitioned such that the first m0 rows belong to
3368    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3369    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3370 
3371    The DIAGONAL portion of the local submatrix of a processor can be defined
3372    as the submatrix which is obtained by extraction the part corresponding to
3373    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3374    first row that belongs to the processor, r2 is the last row belonging to
3375    the this processor, and c1-c2 is range of indices of the local part of a
3376    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3377    common case of a square matrix, the row and column ranges are the same and
3378    the DIAGONAL part is also square. The remaining portion of the local
3379    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3380 
3381    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3382 
3383    You can call MatGetInfo() to get information on how effective the preallocation was;
3384    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3385    You can also run with the option -info and look for messages with the string
3386    malloc in them to see if additional memory allocation was needed.
3387 
3388    Example usage:
3389 
3390    Consider the following 8x8 matrix with 34 non-zero values, that is
3391    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3392    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3393    as follows:
3394 
3395 .vb
3396             1  2  0  |  0  3  0  |  0  4
3397     Proc0   0  5  6  |  7  0  0  |  8  0
3398             9  0 10  | 11  0  0  | 12  0
3399     -------------------------------------
3400            13  0 14  | 15 16 17  |  0  0
3401     Proc1   0 18  0  | 19 20 21  |  0  0
3402             0  0  0  | 22 23  0  | 24  0
3403     -------------------------------------
3404     Proc2  25 26 27  |  0  0 28  | 29  0
3405            30  0  0  | 31 32 33  |  0 34
3406 .ve
3407 
3408    This can be represented as a collection of submatrices as:
3409 
3410 .vb
3411       A B C
3412       D E F
3413       G H I
3414 .ve
3415 
3416    Where the submatrices A,B,C are owned by proc0, D,E,F are
3417    owned by proc1, G,H,I are owned by proc2.
3418 
3419    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3420    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3421    The 'M','N' parameters are 8,8, and have the same values on all procs.
3422 
3423    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3424    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3425    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3426    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3427    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3428    matrix, ans [DF] as another SeqAIJ matrix.
3429 
3430    When d_nz, o_nz parameters are specified, d_nz storage elements are
3431    allocated for every row of the local diagonal submatrix, and o_nz
3432    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3433    One way to choose d_nz and o_nz is to use the max nonzerors per local
3434    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3435    In this case, the values of d_nz,o_nz are:
3436 .vb
3437      proc0 : dnz = 2, o_nz = 2
3438      proc1 : dnz = 3, o_nz = 2
3439      proc2 : dnz = 1, o_nz = 4
3440 .ve
3441    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3442    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3443    for proc3. i.e we are using 12+15+10=37 storage locations to store
3444    34 values.
3445 
3446    When d_nnz, o_nnz parameters are specified, the storage is specified
3447    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3448    In the above case the values for d_nnz,o_nnz are:
3449 .vb
3450      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3451      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3452      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3453 .ve
3454    Here the space allocated is sum of all the above values i.e 34, and
3455    hence pre-allocation is perfect.
3456 
3457    Level: intermediate
3458 
3459 .keywords: matrix, aij, compressed row, sparse, parallel
3460 
3461 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3462           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3463 @*/
3464 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3465 {
3466   PetscErrorCode ierr;
3467 
3468   PetscFunctionBegin;
3469   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3470   PetscValidType(B,1);
3471   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3472   PetscFunctionReturn(0);
3473 }
3474 
3475 #undef __FUNCT__
3476 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3477 /*@
3478      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3479          CSR format the local rows.
3480 
3481    Collective on MPI_Comm
3482 
3483    Input Parameters:
3484 +  comm - MPI communicator
3485 .  m - number of local rows (Cannot be PETSC_DECIDE)
3486 .  n - This value should be the same as the local size used in creating the
3487        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3488        calculated if N is given) For square matrices n is almost always m.
3489 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3490 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3491 .   i - row indices
3492 .   j - column indices
3493 -   a - matrix values
3494 
3495    Output Parameter:
3496 .   mat - the matrix
3497 
3498    Level: intermediate
3499 
3500    Notes:
3501        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3502      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3503      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3504 
3505        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3506 
3507        The format which is used for the sparse matrix input, is equivalent to a
3508     row-major ordering.. i.e for the following matrix, the input data expected is
3509     as shown:
3510 
3511         1 0 0
3512         2 0 3     P0
3513        -------
3514         4 5 6     P1
3515 
3516      Process0 [P0]: rows_owned=[0,1]
3517         i =  {0,1,3}  [size = nrow+1  = 2+1]
3518         j =  {0,0,2}  [size = nz = 6]
3519         v =  {1,2,3}  [size = nz = 6]
3520 
3521      Process1 [P1]: rows_owned=[2]
3522         i =  {0,3}    [size = nrow+1  = 1+1]
3523         j =  {0,1,2}  [size = nz = 6]
3524         v =  {4,5,6}  [size = nz = 6]
3525 
3526 .keywords: matrix, aij, compressed row, sparse, parallel
3527 
3528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3529           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3530 @*/
3531 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3532 {
3533   PetscErrorCode ierr;
3534 
3535   PetscFunctionBegin;
3536   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3537   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3538   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3539   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3540   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3541   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3542   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatCreateAIJ"
3548 /*@C
3549    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3550    (the default parallel PETSc format).  For good matrix assembly performance
3551    the user should preallocate the matrix storage by setting the parameters
3552    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3553    performance can be increased by more than a factor of 50.
3554 
3555    Collective on MPI_Comm
3556 
3557    Input Parameters:
3558 +  comm - MPI communicator
3559 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3560            This value should be the same as the local size used in creating the
3561            y vector for the matrix-vector product y = Ax.
3562 .  n - This value should be the same as the local size used in creating the
3563        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3564        calculated if N is given) For square matrices n is almost always m.
3565 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3566 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3567 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3568            (same value is used for all local rows)
3569 .  d_nnz - array containing the number of nonzeros in the various rows of the
3570            DIAGONAL portion of the local submatrix (possibly different for each row)
3571            or NULL, if d_nz is used to specify the nonzero structure.
3572            The size of this array is equal to the number of local rows, i.e 'm'.
3573 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3574            submatrix (same value is used for all local rows).
3575 -  o_nnz - array containing the number of nonzeros in the various rows of the
3576            OFF-DIAGONAL portion of the local submatrix (possibly different for
3577            each row) or NULL, if o_nz is used to specify the nonzero
3578            structure. The size of this array is equal to the number
3579            of local rows, i.e 'm'.
3580 
3581    Output Parameter:
3582 .  A - the matrix
3583 
3584    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3585    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3586    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3587 
3588    Notes:
3589    If the *_nnz parameter is given then the *_nz parameter is ignored
3590 
3591    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3592    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3593    storage requirements for this matrix.
3594 
3595    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3596    processor than it must be used on all processors that share the object for
3597    that argument.
3598 
3599    The user MUST specify either the local or global matrix dimensions
3600    (possibly both).
3601 
3602    The parallel matrix is partitioned across processors such that the
3603    first m0 rows belong to process 0, the next m1 rows belong to
3604    process 1, the next m2 rows belong to process 2 etc.. where
3605    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3606    values corresponding to [m x N] submatrix.
3607 
3608    The columns are logically partitioned with the n0 columns belonging
3609    to 0th partition, the next n1 columns belonging to the next
3610    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3611 
3612    The DIAGONAL portion of the local submatrix on any given processor
3613    is the submatrix corresponding to the rows and columns m,n
3614    corresponding to the given processor. i.e diagonal matrix on
3615    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3616    etc. The remaining portion of the local submatrix [m x (N-n)]
3617    constitute the OFF-DIAGONAL portion. The example below better
3618    illustrates this concept.
3619 
3620    For a square global matrix we define each processor's diagonal portion
3621    to be its local rows and the corresponding columns (a square submatrix);
3622    each processor's off-diagonal portion encompasses the remainder of the
3623    local matrix (a rectangular submatrix).
3624 
3625    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3626 
3627    When calling this routine with a single process communicator, a matrix of
3628    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3629    type of communicator, use the construction mechanism:
3630      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3631 
3632    By default, this format uses inodes (identical nodes) when possible.
3633    We search for consecutive rows with the same nonzero structure, thereby
3634    reusing matrix information to achieve increased efficiency.
3635 
3636    Options Database Keys:
3637 +  -mat_no_inode  - Do not use inodes
3638 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3639 -  -mat_aij_oneindex - Internally use indexing starting at 1
3640         rather than 0.  Note that when calling MatSetValues(),
3641         the user still MUST index entries starting at 0!
3642 
3643 
3644    Example usage:
3645 
3646    Consider the following 8x8 matrix with 34 non-zero values, that is
3647    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3648    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3649    as follows:
3650 
3651 .vb
3652             1  2  0  |  0  3  0  |  0  4
3653     Proc0   0  5  6  |  7  0  0  |  8  0
3654             9  0 10  | 11  0  0  | 12  0
3655     -------------------------------------
3656            13  0 14  | 15 16 17  |  0  0
3657     Proc1   0 18  0  | 19 20 21  |  0  0
3658             0  0  0  | 22 23  0  | 24  0
3659     -------------------------------------
3660     Proc2  25 26 27  |  0  0 28  | 29  0
3661            30  0  0  | 31 32 33  |  0 34
3662 .ve
3663 
3664    This can be represented as a collection of submatrices as:
3665 
3666 .vb
3667       A B C
3668       D E F
3669       G H I
3670 .ve
3671 
3672    Where the submatrices A,B,C are owned by proc0, D,E,F are
3673    owned by proc1, G,H,I are owned by proc2.
3674 
3675    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3676    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3677    The 'M','N' parameters are 8,8, and have the same values on all procs.
3678 
3679    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3680    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3681    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3682    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3683    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3684    matrix, ans [DF] as another SeqAIJ matrix.
3685 
3686    When d_nz, o_nz parameters are specified, d_nz storage elements are
3687    allocated for every row of the local diagonal submatrix, and o_nz
3688    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3689    One way to choose d_nz and o_nz is to use the max nonzerors per local
3690    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3691    In this case, the values of d_nz,o_nz are:
3692 .vb
3693      proc0 : dnz = 2, o_nz = 2
3694      proc1 : dnz = 3, o_nz = 2
3695      proc2 : dnz = 1, o_nz = 4
3696 .ve
3697    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3698    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3699    for proc3. i.e we are using 12+15+10=37 storage locations to store
3700    34 values.
3701 
3702    When d_nnz, o_nnz parameters are specified, the storage is specified
3703    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3704    In the above case the values for d_nnz,o_nnz are:
3705 .vb
3706      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3707      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3708      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3709 .ve
3710    Here the space allocated is sum of all the above values i.e 34, and
3711    hence pre-allocation is perfect.
3712 
3713    Level: intermediate
3714 
3715 .keywords: matrix, aij, compressed row, sparse, parallel
3716 
3717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3718           MPIAIJ, MatCreateMPIAIJWithArrays()
3719 @*/
3720 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3721 {
3722   PetscErrorCode ierr;
3723   PetscMPIInt    size;
3724 
3725   PetscFunctionBegin;
3726   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3727   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3729   if (size > 1) {
3730     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3731     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3732   } else {
3733     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3734     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3735   }
3736   PetscFunctionReturn(0);
3737 }
3738 
3739 #undef __FUNCT__
3740 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3741 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3742 {
3743   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3744 
3745   PetscFunctionBegin;
3746   if (Ad)     *Ad     = a->A;
3747   if (Ao)     *Ao     = a->B;
3748   if (colmap) *colmap = a->garray;
3749   PetscFunctionReturn(0);
3750 }
3751 
3752 #undef __FUNCT__
3753 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3754 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3755 {
3756   PetscErrorCode ierr;
3757   PetscInt       i;
3758   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3759 
3760   PetscFunctionBegin;
3761   if (coloring->ctype == IS_COLORING_GLOBAL) {
3762     ISColoringValue *allcolors,*colors;
3763     ISColoring      ocoloring;
3764 
3765     /* set coloring for diagonal portion */
3766     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3767 
3768     /* set coloring for off-diagonal portion */
3769     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3770     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3771     for (i=0; i<a->B->cmap->n; i++) {
3772       colors[i] = allcolors[a->garray[i]];
3773     }
3774     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3775     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3776     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3777     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3778   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3779     ISColoringValue *colors;
3780     PetscInt        *larray;
3781     ISColoring      ocoloring;
3782 
3783     /* set coloring for diagonal portion */
3784     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3785     for (i=0; i<a->A->cmap->n; i++) {
3786       larray[i] = i + A->cmap->rstart;
3787     }
3788     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3789     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3790     for (i=0; i<a->A->cmap->n; i++) {
3791       colors[i] = coloring->colors[larray[i]];
3792     }
3793     ierr = PetscFree(larray);CHKERRQ(ierr);
3794     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3795     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3796     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3797 
3798     /* set coloring for off-diagonal portion */
3799     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3800     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3801     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3802     for (i=0; i<a->B->cmap->n; i++) {
3803       colors[i] = coloring->colors[larray[i]];
3804     }
3805     ierr = PetscFree(larray);CHKERRQ(ierr);
3806     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3807     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3808     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3809   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 #undef __FUNCT__
3814 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3815 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3816 {
3817   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3818   PetscErrorCode ierr;
3819 
3820   PetscFunctionBegin;
3821   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3822   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3828 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3829 {
3830   PetscErrorCode ierr;
3831   PetscInt       m,N,i,rstart,nnz,Ii;
3832   PetscInt       *indx;
3833   PetscScalar    *values;
3834 
3835   PetscFunctionBegin;
3836   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3837   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3838     PetscInt       *dnz,*onz,sum,bs,cbs;
3839 
3840     if (n == PETSC_DECIDE) {
3841       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3842     }
3843     /* Check sum(n) = N */
3844     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3845     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3846 
3847     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3848     rstart -= m;
3849 
3850     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3851     for (i=0; i<m; i++) {
3852       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3853       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3854       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3855     }
3856 
3857     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3858     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3859     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3860     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3861     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3862     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3863     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3864   }
3865 
3866   /* numeric phase */
3867   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3868   for (i=0; i<m; i++) {
3869     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3870     Ii   = i + rstart;
3871     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3872     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3873   }
3874   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3875   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatFileSplit"
3881 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3882 {
3883   PetscErrorCode    ierr;
3884   PetscMPIInt       rank;
3885   PetscInt          m,N,i,rstart,nnz;
3886   size_t            len;
3887   const PetscInt    *indx;
3888   PetscViewer       out;
3889   char              *name;
3890   Mat               B;
3891   const PetscScalar *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3895   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3896   /* Should this be the type of the diagonal block of A? */
3897   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3898   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3899   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3900   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3901   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3902   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3903   for (i=0; i<m; i++) {
3904     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3905     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3906     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3907   }
3908   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3909   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3910 
3911   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3912   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3913   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3914   sprintf(name,"%s.%d",outfile,rank);
3915   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3916   ierr = PetscFree(name);CHKERRQ(ierr);
3917   ierr = MatView(B,out);CHKERRQ(ierr);
3918   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3919   ierr = MatDestroy(&B);CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3924 #undef __FUNCT__
3925 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3926 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3927 {
3928   PetscErrorCode      ierr;
3929   Mat_Merge_SeqsToMPI *merge;
3930   PetscContainer      container;
3931 
3932   PetscFunctionBegin;
3933   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3934   if (container) {
3935     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3936     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3937     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3938     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3944     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3945     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3946     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3947     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3948     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3949     ierr = PetscFree(merge);CHKERRQ(ierr);
3950     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3951   }
3952   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3953   PetscFunctionReturn(0);
3954 }
3955 
3956 #include <../src/mat/utils/freespace.h>
3957 #include <petscbt.h>
3958 
3959 #undef __FUNCT__
3960 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3961 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3962 {
3963   PetscErrorCode      ierr;
3964   MPI_Comm            comm;
3965   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3966   PetscMPIInt         size,rank,taga,*len_s;
3967   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3968   PetscInt            proc,m;
3969   PetscInt            **buf_ri,**buf_rj;
3970   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3971   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3972   MPI_Request         *s_waits,*r_waits;
3973   MPI_Status          *status;
3974   MatScalar           *aa=a->a;
3975   MatScalar           **abuf_r,*ba_i;
3976   Mat_Merge_SeqsToMPI *merge;
3977   PetscContainer      container;
3978 
3979   PetscFunctionBegin;
3980   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3981   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3982 
3983   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3984   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3985 
3986   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3987   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3988 
3989   bi     = merge->bi;
3990   bj     = merge->bj;
3991   buf_ri = merge->buf_ri;
3992   buf_rj = merge->buf_rj;
3993 
3994   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3995   owners = merge->rowmap->range;
3996   len_s  = merge->len_s;
3997 
3998   /* send and recv matrix values */
3999   /*-----------------------------*/
4000   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4001   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4002 
4003   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4004   for (proc=0,k=0; proc<size; proc++) {
4005     if (!len_s[proc]) continue;
4006     i    = owners[proc];
4007     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4008     k++;
4009   }
4010 
4011   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4012   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4013   ierr = PetscFree(status);CHKERRQ(ierr);
4014 
4015   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4016   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4017 
4018   /* insert mat values of mpimat */
4019   /*----------------------------*/
4020   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4021   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4022 
4023   for (k=0; k<merge->nrecv; k++) {
4024     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4025     nrows       = *(buf_ri_k[k]);
4026     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4027     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4028   }
4029 
4030   /* set values of ba */
4031   m = merge->rowmap->n;
4032   for (i=0; i<m; i++) {
4033     arow = owners[rank] + i;
4034     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4035     bnzi = bi[i+1] - bi[i];
4036     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4037 
4038     /* add local non-zero vals of this proc's seqmat into ba */
4039     anzi   = ai[arow+1] - ai[arow];
4040     aj     = a->j + ai[arow];
4041     aa     = a->a + ai[arow];
4042     nextaj = 0;
4043     for (j=0; nextaj<anzi; j++) {
4044       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4045         ba_i[j] += aa[nextaj++];
4046       }
4047     }
4048 
4049     /* add received vals into ba */
4050     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4051       /* i-th row */
4052       if (i == *nextrow[k]) {
4053         anzi   = *(nextai[k]+1) - *nextai[k];
4054         aj     = buf_rj[k] + *(nextai[k]);
4055         aa     = abuf_r[k] + *(nextai[k]);
4056         nextaj = 0;
4057         for (j=0; nextaj<anzi; j++) {
4058           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4059             ba_i[j] += aa[nextaj++];
4060           }
4061         }
4062         nextrow[k]++; nextai[k]++;
4063       }
4064     }
4065     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4066   }
4067   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4068   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4069 
4070   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4071   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4072   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4073   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4074   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4082 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4083 {
4084   PetscErrorCode      ierr;
4085   Mat                 B_mpi;
4086   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4087   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4088   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4089   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4090   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4091   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4092   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4093   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4094   MPI_Status          *status;
4095   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4096   PetscBT             lnkbt;
4097   Mat_Merge_SeqsToMPI *merge;
4098   PetscContainer      container;
4099 
4100   PetscFunctionBegin;
4101   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4102 
4103   /* make sure it is a PETSc comm */
4104   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4107 
4108   ierr = PetscNew(&merge);CHKERRQ(ierr);
4109   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4110 
4111   /* determine row ownership */
4112   /*---------------------------------------------------------*/
4113   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4114   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4115   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4116   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4117   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4118   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4119   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4120 
4121   m      = merge->rowmap->n;
4122   owners = merge->rowmap->range;
4123 
4124   /* determine the number of messages to send, their lengths */
4125   /*---------------------------------------------------------*/
4126   len_s = merge->len_s;
4127 
4128   len          = 0; /* length of buf_si[] */
4129   merge->nsend = 0;
4130   for (proc=0; proc<size; proc++) {
4131     len_si[proc] = 0;
4132     if (proc == rank) {
4133       len_s[proc] = 0;
4134     } else {
4135       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4136       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4137     }
4138     if (len_s[proc]) {
4139       merge->nsend++;
4140       nrows = 0;
4141       for (i=owners[proc]; i<owners[proc+1]; i++) {
4142         if (ai[i+1] > ai[i]) nrows++;
4143       }
4144       len_si[proc] = 2*(nrows+1);
4145       len         += len_si[proc];
4146     }
4147   }
4148 
4149   /* determine the number and length of messages to receive for ij-structure */
4150   /*-------------------------------------------------------------------------*/
4151   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4152   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4153 
4154   /* post the Irecv of j-structure */
4155   /*-------------------------------*/
4156   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4157   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4158 
4159   /* post the Isend of j-structure */
4160   /*--------------------------------*/
4161   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4162 
4163   for (proc=0, k=0; proc<size; proc++) {
4164     if (!len_s[proc]) continue;
4165     i    = owners[proc];
4166     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4167     k++;
4168   }
4169 
4170   /* receives and sends of j-structure are complete */
4171   /*------------------------------------------------*/
4172   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4173   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4174 
4175   /* send and recv i-structure */
4176   /*---------------------------*/
4177   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4178   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4179 
4180   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4181   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4182   for (proc=0,k=0; proc<size; proc++) {
4183     if (!len_s[proc]) continue;
4184     /* form outgoing message for i-structure:
4185          buf_si[0]:                 nrows to be sent
4186                [1:nrows]:           row index (global)
4187                [nrows+1:2*nrows+1]: i-structure index
4188     */
4189     /*-------------------------------------------*/
4190     nrows       = len_si[proc]/2 - 1;
4191     buf_si_i    = buf_si + nrows+1;
4192     buf_si[0]   = nrows;
4193     buf_si_i[0] = 0;
4194     nrows       = 0;
4195     for (i=owners[proc]; i<owners[proc+1]; i++) {
4196       anzi = ai[i+1] - ai[i];
4197       if (anzi) {
4198         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4199         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4200         nrows++;
4201       }
4202     }
4203     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4204     k++;
4205     buf_si += len_si[proc];
4206   }
4207 
4208   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4209   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4210 
4211   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4212   for (i=0; i<merge->nrecv; i++) {
4213     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4214   }
4215 
4216   ierr = PetscFree(len_si);CHKERRQ(ierr);
4217   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4218   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4219   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4220   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4221   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4222   ierr = PetscFree(status);CHKERRQ(ierr);
4223 
4224   /* compute a local seq matrix in each processor */
4225   /*----------------------------------------------*/
4226   /* allocate bi array and free space for accumulating nonzero column info */
4227   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4228   bi[0] = 0;
4229 
4230   /* create and initialize a linked list */
4231   nlnk = N+1;
4232   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4233 
4234   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4235   len  = ai[owners[rank+1]] - ai[owners[rank]];
4236   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4237 
4238   current_space = free_space;
4239 
4240   /* determine symbolic info for each local row */
4241   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4242 
4243   for (k=0; k<merge->nrecv; k++) {
4244     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4245     nrows       = *buf_ri_k[k];
4246     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4247     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4248   }
4249 
4250   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4251   len  = 0;
4252   for (i=0; i<m; i++) {
4253     bnzi = 0;
4254     /* add local non-zero cols of this proc's seqmat into lnk */
4255     arow  = owners[rank] + i;
4256     anzi  = ai[arow+1] - ai[arow];
4257     aj    = a->j + ai[arow];
4258     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4259     bnzi += nlnk;
4260     /* add received col data into lnk */
4261     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4262       if (i == *nextrow[k]) { /* i-th row */
4263         anzi  = *(nextai[k]+1) - *nextai[k];
4264         aj    = buf_rj[k] + *nextai[k];
4265         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4266         bnzi += nlnk;
4267         nextrow[k]++; nextai[k]++;
4268       }
4269     }
4270     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4271 
4272     /* if free space is not available, make more free space */
4273     if (current_space->local_remaining<bnzi) {
4274       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4275       nspacedouble++;
4276     }
4277     /* copy data into free space, then initialize lnk */
4278     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4279     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4280 
4281     current_space->array           += bnzi;
4282     current_space->local_used      += bnzi;
4283     current_space->local_remaining -= bnzi;
4284 
4285     bi[i+1] = bi[i] + bnzi;
4286   }
4287 
4288   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4289 
4290   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4291   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4292   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4293 
4294   /* create symbolic parallel matrix B_mpi */
4295   /*---------------------------------------*/
4296   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4297   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4298   if (n==PETSC_DECIDE) {
4299     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4300   } else {
4301     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4302   }
4303   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4304   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4305   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4306   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4307   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4308 
4309   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4310   B_mpi->assembled    = PETSC_FALSE;
4311   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4312   merge->bi           = bi;
4313   merge->bj           = bj;
4314   merge->buf_ri       = buf_ri;
4315   merge->buf_rj       = buf_rj;
4316   merge->coi          = NULL;
4317   merge->coj          = NULL;
4318   merge->owners_co    = NULL;
4319 
4320   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4321 
4322   /* attach the supporting struct to B_mpi for reuse */
4323   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4324   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4325   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4326   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4327   *mpimat = B_mpi;
4328 
4329   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4330   PetscFunctionReturn(0);
4331 }
4332 
4333 #undef __FUNCT__
4334 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4335 /*@C
4336       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4337                  matrices from each processor
4338 
4339     Collective on MPI_Comm
4340 
4341    Input Parameters:
4342 +    comm - the communicators the parallel matrix will live on
4343 .    seqmat - the input sequential matrices
4344 .    m - number of local rows (or PETSC_DECIDE)
4345 .    n - number of local columns (or PETSC_DECIDE)
4346 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4347 
4348    Output Parameter:
4349 .    mpimat - the parallel matrix generated
4350 
4351     Level: advanced
4352 
4353    Notes:
4354      The dimensions of the sequential matrix in each processor MUST be the same.
4355      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4356      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4357 @*/
4358 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4359 {
4360   PetscErrorCode ierr;
4361   PetscMPIInt    size;
4362 
4363   PetscFunctionBegin;
4364   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4365   if (size == 1) {
4366     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4367     if (scall == MAT_INITIAL_MATRIX) {
4368       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4369     } else {
4370       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4371     }
4372     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4373     PetscFunctionReturn(0);
4374   }
4375   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4376   if (scall == MAT_INITIAL_MATRIX) {
4377     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4378   }
4379   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4380   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4381   PetscFunctionReturn(0);
4382 }
4383 
4384 #undef __FUNCT__
4385 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4386 /*@
4387      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4388           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4389           with MatGetSize()
4390 
4391     Not Collective
4392 
4393    Input Parameters:
4394 +    A - the matrix
4395 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4396 
4397    Output Parameter:
4398 .    A_loc - the local sequential matrix generated
4399 
4400     Level: developer
4401 
4402 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4403 
4404 @*/
4405 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4406 {
4407   PetscErrorCode ierr;
4408   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4409   Mat_SeqAIJ     *mat,*a,*b;
4410   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4411   MatScalar      *aa,*ba,*cam;
4412   PetscScalar    *ca;
4413   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4414   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4415   PetscBool      match;
4416   MPI_Comm       comm;
4417   PetscMPIInt    size;
4418 
4419   PetscFunctionBegin;
4420   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4421   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4422   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4424   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4425 
4426   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4427   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4428   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4429   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4430   aa = a->a; ba = b->a;
4431   if (scall == MAT_INITIAL_MATRIX) {
4432     if (size == 1) {
4433       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4434       PetscFunctionReturn(0);
4435     }
4436 
4437     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4438     ci[0] = 0;
4439     for (i=0; i<am; i++) {
4440       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4441     }
4442     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4443     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4444     k    = 0;
4445     for (i=0; i<am; i++) {
4446       ncols_o = bi[i+1] - bi[i];
4447       ncols_d = ai[i+1] - ai[i];
4448       /* off-diagonal portion of A */
4449       for (jo=0; jo<ncols_o; jo++) {
4450         col = cmap[*bj];
4451         if (col >= cstart) break;
4452         cj[k]   = col; bj++;
4453         ca[k++] = *ba++;
4454       }
4455       /* diagonal portion of A */
4456       for (j=0; j<ncols_d; j++) {
4457         cj[k]   = cstart + *aj++;
4458         ca[k++] = *aa++;
4459       }
4460       /* off-diagonal portion of A */
4461       for (j=jo; j<ncols_o; j++) {
4462         cj[k]   = cmap[*bj++];
4463         ca[k++] = *ba++;
4464       }
4465     }
4466     /* put together the new matrix */
4467     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4468     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4469     /* Since these are PETSc arrays, change flags to free them as necessary. */
4470     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4471     mat->free_a  = PETSC_TRUE;
4472     mat->free_ij = PETSC_TRUE;
4473     mat->nonew   = 0;
4474   } else if (scall == MAT_REUSE_MATRIX) {
4475     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4476     ci = mat->i; cj = mat->j; cam = mat->a;
4477     for (i=0; i<am; i++) {
4478       /* off-diagonal portion of A */
4479       ncols_o = bi[i+1] - bi[i];
4480       for (jo=0; jo<ncols_o; jo++) {
4481         col = cmap[*bj];
4482         if (col >= cstart) break;
4483         *cam++ = *ba++; bj++;
4484       }
4485       /* diagonal portion of A */
4486       ncols_d = ai[i+1] - ai[i];
4487       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4488       /* off-diagonal portion of A */
4489       for (j=jo; j<ncols_o; j++) {
4490         *cam++ = *ba++; bj++;
4491       }
4492     }
4493   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4494   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 #undef __FUNCT__
4499 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4500 /*@C
4501      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4502 
4503     Not Collective
4504 
4505    Input Parameters:
4506 +    A - the matrix
4507 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4508 -    row, col - index sets of rows and columns to extract (or NULL)
4509 
4510    Output Parameter:
4511 .    A_loc - the local sequential matrix generated
4512 
4513     Level: developer
4514 
4515 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4516 
4517 @*/
4518 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4519 {
4520   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4521   PetscErrorCode ierr;
4522   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4523   IS             isrowa,iscola;
4524   Mat            *aloc;
4525   PetscBool      match;
4526 
4527   PetscFunctionBegin;
4528   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4529   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4530   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4531   if (!row) {
4532     start = A->rmap->rstart; end = A->rmap->rend;
4533     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4534   } else {
4535     isrowa = *row;
4536   }
4537   if (!col) {
4538     start = A->cmap->rstart;
4539     cmap  = a->garray;
4540     nzA   = a->A->cmap->n;
4541     nzB   = a->B->cmap->n;
4542     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4543     ncols = 0;
4544     for (i=0; i<nzB; i++) {
4545       if (cmap[i] < start) idx[ncols++] = cmap[i];
4546       else break;
4547     }
4548     imark = i;
4549     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4550     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4551     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4552   } else {
4553     iscola = *col;
4554   }
4555   if (scall != MAT_INITIAL_MATRIX) {
4556     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4557     aloc[0] = *A_loc;
4558   }
4559   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4560   *A_loc = aloc[0];
4561   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4562   if (!row) {
4563     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4564   }
4565   if (!col) {
4566     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4567   }
4568   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatGetBrowsOfAcols"
4574 /*@C
4575     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4576 
4577     Collective on Mat
4578 
4579    Input Parameters:
4580 +    A,B - the matrices in mpiaij format
4581 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4582 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4583 
4584    Output Parameter:
4585 +    rowb, colb - index sets of rows and columns of B to extract
4586 -    B_seq - the sequential matrix generated
4587 
4588     Level: developer
4589 
4590 @*/
4591 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4592 {
4593   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4594   PetscErrorCode ierr;
4595   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4596   IS             isrowb,iscolb;
4597   Mat            *bseq=NULL;
4598 
4599   PetscFunctionBegin;
4600   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4601     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4602   }
4603   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4604 
4605   if (scall == MAT_INITIAL_MATRIX) {
4606     start = A->cmap->rstart;
4607     cmap  = a->garray;
4608     nzA   = a->A->cmap->n;
4609     nzB   = a->B->cmap->n;
4610     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4611     ncols = 0;
4612     for (i=0; i<nzB; i++) {  /* row < local row index */
4613       if (cmap[i] < start) idx[ncols++] = cmap[i];
4614       else break;
4615     }
4616     imark = i;
4617     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4618     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4619     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4620     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4621   } else {
4622     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4623     isrowb  = *rowb; iscolb = *colb;
4624     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4625     bseq[0] = *B_seq;
4626   }
4627   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4628   *B_seq = bseq[0];
4629   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4630   if (!rowb) {
4631     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4632   } else {
4633     *rowb = isrowb;
4634   }
4635   if (!colb) {
4636     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4637   } else {
4638     *colb = iscolb;
4639   }
4640   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 #undef __FUNCT__
4645 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4646 /*
4647     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4648     of the OFF-DIAGONAL portion of local A
4649 
4650     Collective on Mat
4651 
4652    Input Parameters:
4653 +    A,B - the matrices in mpiaij format
4654 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4655 
4656    Output Parameter:
4657 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4658 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4659 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4660 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4661 
4662     Level: developer
4663 
4664 */
4665 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4666 {
4667   VecScatter_MPI_General *gen_to,*gen_from;
4668   PetscErrorCode         ierr;
4669   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4670   Mat_SeqAIJ             *b_oth;
4671   VecScatter             ctx =a->Mvctx;
4672   MPI_Comm               comm;
4673   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4674   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4675   PetscScalar            *rvalues,*svalues;
4676   MatScalar              *b_otha,*bufa,*bufA;
4677   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4678   MPI_Request            *rwaits = NULL,*swaits = NULL;
4679   MPI_Status             *sstatus,rstatus;
4680   PetscMPIInt            jj,size;
4681   PetscInt               *cols,sbs,rbs;
4682   PetscScalar            *vals;
4683 
4684   PetscFunctionBegin;
4685   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4686   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4687   if (size == 1) PetscFunctionReturn(0);
4688 
4689   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4690     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4691   }
4692   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4693   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4694 
4695   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4696   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4697   rvalues  = gen_from->values; /* holds the length of receiving row */
4698   svalues  = gen_to->values;   /* holds the length of sending row */
4699   nrecvs   = gen_from->n;
4700   nsends   = gen_to->n;
4701 
4702   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4703   srow    = gen_to->indices;    /* local row index to be sent */
4704   sstarts = gen_to->starts;
4705   sprocs  = gen_to->procs;
4706   sstatus = gen_to->sstatus;
4707   sbs     = gen_to->bs;
4708   rstarts = gen_from->starts;
4709   rprocs  = gen_from->procs;
4710   rbs     = gen_from->bs;
4711 
4712   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4713   if (scall == MAT_INITIAL_MATRIX) {
4714     /* i-array */
4715     /*---------*/
4716     /*  post receives */
4717     for (i=0; i<nrecvs; i++) {
4718       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4719       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4720       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4721     }
4722 
4723     /* pack the outgoing message */
4724     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4725 
4726     sstartsj[0] = 0;
4727     rstartsj[0] = 0;
4728     len         = 0; /* total length of j or a array to be sent */
4729     k           = 0;
4730     for (i=0; i<nsends; i++) {
4731       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4732       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4733       for (j=0; j<nrows; j++) {
4734         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4735         for (l=0; l<sbs; l++) {
4736           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4737 
4738           rowlen[j*sbs+l] = ncols;
4739 
4740           len += ncols;
4741           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4742         }
4743         k++;
4744       }
4745       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4746 
4747       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4748     }
4749     /* recvs and sends of i-array are completed */
4750     i = nrecvs;
4751     while (i--) {
4752       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4753     }
4754     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4755 
4756     /* allocate buffers for sending j and a arrays */
4757     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4758     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4759 
4760     /* create i-array of B_oth */
4761     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4762 
4763     b_othi[0] = 0;
4764     len       = 0; /* total length of j or a array to be received */
4765     k         = 0;
4766     for (i=0; i<nrecvs; i++) {
4767       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4768       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4769       for (j=0; j<nrows; j++) {
4770         b_othi[k+1] = b_othi[k] + rowlen[j];
4771         len        += rowlen[j]; k++;
4772       }
4773       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4774     }
4775 
4776     /* allocate space for j and a arrrays of B_oth */
4777     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4778     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4779 
4780     /* j-array */
4781     /*---------*/
4782     /*  post receives of j-array */
4783     for (i=0; i<nrecvs; i++) {
4784       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4785       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4786     }
4787 
4788     /* pack the outgoing message j-array */
4789     k = 0;
4790     for (i=0; i<nsends; i++) {
4791       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4792       bufJ  = bufj+sstartsj[i];
4793       for (j=0; j<nrows; j++) {
4794         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4795         for (ll=0; ll<sbs; ll++) {
4796           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4797           for (l=0; l<ncols; l++) {
4798             *bufJ++ = cols[l];
4799           }
4800           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4801         }
4802       }
4803       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4804     }
4805 
4806     /* recvs and sends of j-array are completed */
4807     i = nrecvs;
4808     while (i--) {
4809       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4810     }
4811     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4812   } else if (scall == MAT_REUSE_MATRIX) {
4813     sstartsj = *startsj_s;
4814     rstartsj = *startsj_r;
4815     bufa     = *bufa_ptr;
4816     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4817     b_otha   = b_oth->a;
4818   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4819 
4820   /* a-array */
4821   /*---------*/
4822   /*  post receives of a-array */
4823   for (i=0; i<nrecvs; i++) {
4824     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4825     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4826   }
4827 
4828   /* pack the outgoing message a-array */
4829   k = 0;
4830   for (i=0; i<nsends; i++) {
4831     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4832     bufA  = bufa+sstartsj[i];
4833     for (j=0; j<nrows; j++) {
4834       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4835       for (ll=0; ll<sbs; ll++) {
4836         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4837         for (l=0; l<ncols; l++) {
4838           *bufA++ = vals[l];
4839         }
4840         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4841       }
4842     }
4843     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4844   }
4845   /* recvs and sends of a-array are completed */
4846   i = nrecvs;
4847   while (i--) {
4848     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4849   }
4850   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4851   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4852 
4853   if (scall == MAT_INITIAL_MATRIX) {
4854     /* put together the new matrix */
4855     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4856 
4857     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4858     /* Since these are PETSc arrays, change flags to free them as necessary. */
4859     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4860     b_oth->free_a  = PETSC_TRUE;
4861     b_oth->free_ij = PETSC_TRUE;
4862     b_oth->nonew   = 0;
4863 
4864     ierr = PetscFree(bufj);CHKERRQ(ierr);
4865     if (!startsj_s || !bufa_ptr) {
4866       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4867       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4868     } else {
4869       *startsj_s = sstartsj;
4870       *startsj_r = rstartsj;
4871       *bufa_ptr  = bufa;
4872     }
4873   }
4874   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4875   PetscFunctionReturn(0);
4876 }
4877 
4878 #undef __FUNCT__
4879 #define __FUNCT__ "MatGetCommunicationStructs"
4880 /*@C
4881   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4882 
4883   Not Collective
4884 
4885   Input Parameters:
4886 . A - The matrix in mpiaij format
4887 
4888   Output Parameter:
4889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4890 . colmap - A map from global column index to local index into lvec
4891 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4892 
4893   Level: developer
4894 
4895 @*/
4896 #if defined(PETSC_USE_CTABLE)
4897 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4898 #else
4899 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4900 #endif
4901 {
4902   Mat_MPIAIJ *a;
4903 
4904   PetscFunctionBegin;
4905   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4906   PetscValidPointer(lvec, 2);
4907   PetscValidPointer(colmap, 3);
4908   PetscValidPointer(multScatter, 4);
4909   a = (Mat_MPIAIJ*) A->data;
4910   if (lvec) *lvec = a->lvec;
4911   if (colmap) *colmap = a->colmap;
4912   if (multScatter) *multScatter = a->Mvctx;
4913   PetscFunctionReturn(0);
4914 }
4915 
4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4917 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4918 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4919 #if defined(PETSC_HAVE_ELEMENTAL)
4920 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4921 #endif
4922 
4923 #undef __FUNCT__
4924 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4925 /*
4926     Computes (B'*A')' since computing B*A directly is untenable
4927 
4928                n                       p                          p
4929         (              )       (              )         (                  )
4930       m (      A       )  *  n (       B      )   =   m (         C        )
4931         (              )       (              )         (                  )
4932 
4933 */
4934 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4935 {
4936   PetscErrorCode ierr;
4937   Mat            At,Bt,Ct;
4938 
4939   PetscFunctionBegin;
4940   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4941   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4942   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4943   ierr = MatDestroy(&At);CHKERRQ(ierr);
4944   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4945   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4946   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4947   PetscFunctionReturn(0);
4948 }
4949 
4950 #undef __FUNCT__
4951 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4952 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4953 {
4954   PetscErrorCode ierr;
4955   PetscInt       m=A->rmap->n,n=B->cmap->n;
4956   Mat            Cmat;
4957 
4958   PetscFunctionBegin;
4959   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4960   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4961   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4962   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4963   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4964   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4965   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4966   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4967 
4968   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4969 
4970   *C = Cmat;
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 /* ----------------------------------------------------------------*/
4975 #undef __FUNCT__
4976 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4977 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4978 {
4979   PetscErrorCode ierr;
4980 
4981   PetscFunctionBegin;
4982   if (scall == MAT_INITIAL_MATRIX) {
4983     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4984     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4985     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4986   }
4987   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4988   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4989   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4990   PetscFunctionReturn(0);
4991 }
4992 
4993 /*MC
4994    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4995 
4996    Options Database Keys:
4997 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4998 
4999   Level: beginner
5000 
5001 .seealso: MatCreateAIJ()
5002 M*/
5003 
5004 #undef __FUNCT__
5005 #define __FUNCT__ "MatCreate_MPIAIJ"
5006 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5007 {
5008   Mat_MPIAIJ     *b;
5009   PetscErrorCode ierr;
5010   PetscMPIInt    size;
5011 
5012   PetscFunctionBegin;
5013   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5014 
5015   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5016   B->data       = (void*)b;
5017   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5018   B->assembled  = PETSC_FALSE;
5019   B->insertmode = NOT_SET_VALUES;
5020   b->size       = size;
5021 
5022   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5023 
5024   /* build cache for off array entries formed */
5025   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5026 
5027   b->donotstash  = PETSC_FALSE;
5028   b->colmap      = 0;
5029   b->garray      = 0;
5030   b->roworiented = PETSC_TRUE;
5031 
5032   /* stuff used for matrix vector multiply */
5033   b->lvec  = NULL;
5034   b->Mvctx = NULL;
5035 
5036   /* stuff for MatGetRow() */
5037   b->rowindices   = 0;
5038   b->rowvalues    = 0;
5039   b->getrowactive = PETSC_FALSE;
5040 
5041   /* flexible pointer used in CUSP/CUSPARSE classes */
5042   b->spptr = NULL;
5043 
5044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5054 #if defined(PETSC_HAVE_ELEMENTAL)
5055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5056 #endif
5057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5060   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5061   PetscFunctionReturn(0);
5062 }
5063 
5064 #undef __FUNCT__
5065 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5066 /*@C
5067      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5068          and "off-diagonal" part of the matrix in CSR format.
5069 
5070    Collective on MPI_Comm
5071 
5072    Input Parameters:
5073 +  comm - MPI communicator
5074 .  m - number of local rows (Cannot be PETSC_DECIDE)
5075 .  n - This value should be the same as the local size used in creating the
5076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5077        calculated if N is given) For square matrices n is almost always m.
5078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5080 .   i - row indices for "diagonal" portion of matrix
5081 .   j - column indices
5082 .   a - matrix values
5083 .   oi - row indices for "off-diagonal" portion of matrix
5084 .   oj - column indices
5085 -   oa - matrix values
5086 
5087    Output Parameter:
5088 .   mat - the matrix
5089 
5090    Level: advanced
5091 
5092    Notes:
5093        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5094        must free the arrays once the matrix has been destroyed and not before.
5095 
5096        The i and j indices are 0 based
5097 
5098        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5099 
5100        This sets local rows and cannot be used to set off-processor values.
5101 
5102        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5103        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5104        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5105        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5106        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5107        communication if it is known that only local entries will be set.
5108 
5109 .keywords: matrix, aij, compressed row, sparse, parallel
5110 
5111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5112           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5113 C@*/
5114 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5115 {
5116   PetscErrorCode ierr;
5117   Mat_MPIAIJ     *maij;
5118 
5119   PetscFunctionBegin;
5120   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5121   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5122   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5123   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5124   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5125   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5126   maij = (Mat_MPIAIJ*) (*mat)->data;
5127 
5128   (*mat)->preallocated = PETSC_TRUE;
5129 
5130   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5131   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5132 
5133   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5134   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5135 
5136   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5137   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5138   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5139   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5140 
5141   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5142   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5143   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5144   PetscFunctionReturn(0);
5145 }
5146 
5147 /*
5148     Special version for direct calls from Fortran
5149 */
5150 #include <petsc-private/fortranimpl.h>
5151 
5152 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5153 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5154 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5155 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5156 #endif
5157 
5158 /* Change these macros so can be used in void function */
5159 #undef CHKERRQ
5160 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5161 #undef SETERRQ2
5162 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5163 #undef SETERRQ3
5164 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5165 #undef SETERRQ
5166 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5167 
5168 #undef __FUNCT__
5169 #define __FUNCT__ "matsetvaluesmpiaij_"
5170 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5171 {
5172   Mat            mat  = *mmat;
5173   PetscInt       m    = *mm, n = *mn;
5174   InsertMode     addv = *maddv;
5175   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5176   PetscScalar    value;
5177   PetscErrorCode ierr;
5178 
5179   MatCheckPreallocated(mat,1);
5180   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5181 
5182 #if defined(PETSC_USE_DEBUG)
5183   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5184 #endif
5185   {
5186     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5187     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5188     PetscBool roworiented = aij->roworiented;
5189 
5190     /* Some Variables required in the macro */
5191     Mat        A                 = aij->A;
5192     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5193     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5194     MatScalar  *aa               = a->a;
5195     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5196     Mat        B                 = aij->B;
5197     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5198     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5199     MatScalar  *ba               = b->a;
5200 
5201     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5202     PetscInt  nonew = a->nonew;
5203     MatScalar *ap1,*ap2;
5204 
5205     PetscFunctionBegin;
5206     for (i=0; i<m; i++) {
5207       if (im[i] < 0) continue;
5208 #if defined(PETSC_USE_DEBUG)
5209       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5210 #endif
5211       if (im[i] >= rstart && im[i] < rend) {
5212         row      = im[i] - rstart;
5213         lastcol1 = -1;
5214         rp1      = aj + ai[row];
5215         ap1      = aa + ai[row];
5216         rmax1    = aimax[row];
5217         nrow1    = ailen[row];
5218         low1     = 0;
5219         high1    = nrow1;
5220         lastcol2 = -1;
5221         rp2      = bj + bi[row];
5222         ap2      = ba + bi[row];
5223         rmax2    = bimax[row];
5224         nrow2    = bilen[row];
5225         low2     = 0;
5226         high2    = nrow2;
5227 
5228         for (j=0; j<n; j++) {
5229           if (roworiented) value = v[i*n+j];
5230           else value = v[i+j*m];
5231           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5232           if (in[j] >= cstart && in[j] < cend) {
5233             col = in[j] - cstart;
5234             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5235           } else if (in[j] < 0) continue;
5236 #if defined(PETSC_USE_DEBUG)
5237           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5238 #endif
5239           else {
5240             if (mat->was_assembled) {
5241               if (!aij->colmap) {
5242                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5243               }
5244 #if defined(PETSC_USE_CTABLE)
5245               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5246               col--;
5247 #else
5248               col = aij->colmap[in[j]] - 1;
5249 #endif
5250               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5251                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5252                 col  =  in[j];
5253                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5254                 B     = aij->B;
5255                 b     = (Mat_SeqAIJ*)B->data;
5256                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5257                 rp2   = bj + bi[row];
5258                 ap2   = ba + bi[row];
5259                 rmax2 = bimax[row];
5260                 nrow2 = bilen[row];
5261                 low2  = 0;
5262                 high2 = nrow2;
5263                 bm    = aij->B->rmap->n;
5264                 ba    = b->a;
5265               }
5266             } else col = in[j];
5267             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5268           }
5269         }
5270       } else if (!aij->donotstash) {
5271         if (roworiented) {
5272           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5273         } else {
5274           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5275         }
5276       }
5277     }
5278   }
5279   PetscFunctionReturnVoid();
5280 }
5281 
5282