xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fee518eb64eefbf9bda05b76e476d2dcf413f412)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 
6 /*MC
7    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8 
9    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
10    and MATMPIAIJ otherwise.  As a result, for single process communicators,
11   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
12   for communicators controlling multiple processes.  It is recommended that you call both of
13   the above preallocation routines for simplicity.
14 
15    Options Database Keys:
16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17 
18   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
19    enough exist.
20 
21   Level: beginner
22 
23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
24 M*/
25 
26 /*MC
27    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28 
29    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
30    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
31    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
32   for communicators controlling multiple processes.  It is recommended that you call both of
33   the above preallocation routines for simplicity.
34 
35    Options Database Keys:
36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37 
38   Level: beginner
39 
40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
41 M*/
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
46 {
47   PetscErrorCode  ierr;
48   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
49   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
50   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
51   const PetscInt  *ia,*ib;
52   const MatScalar *aa,*bb;
53   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
54   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
55 
56   PetscFunctionBegin;
57   *keptrows = 0;
58   ia        = a->i;
59   ib        = b->i;
60   for (i=0; i<m; i++) {
61     na = ia[i+1] - ia[i];
62     nb = ib[i+1] - ib[i];
63     if (!na && !nb) {
64       cnt++;
65       goto ok1;
66     }
67     aa = a->a + ia[i];
68     for (j=0; j<na; j++) {
69       if (aa[j] != 0.0) goto ok1;
70     }
71     bb = b->a + ib[i];
72     for (j=0; j <nb; j++) {
73       if (bb[j] != 0.0) goto ok1;
74     }
75     cnt++;
76 ok1:;
77   }
78   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
79   if (!n0rows) PetscFunctionReturn(0);
80   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
81   cnt  = 0;
82   for (i=0; i<m; i++) {
83     na = ia[i+1] - ia[i];
84     nb = ib[i+1] - ib[i];
85     if (!na && !nb) continue;
86     aa = a->a + ia[i];
87     for (j=0; j<na;j++) {
88       if (aa[j] != 0.0) {
89         rows[cnt++] = rstart + i;
90         goto ok2;
91       }
92     }
93     bb = b->a + ib[i];
94     for (j=0; j<nb; j++) {
95       if (bb[j] != 0.0) {
96         rows[cnt++] = rstart + i;
97         goto ok2;
98       }
99     }
100 ok2:;
101   }
102   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
109 {
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
111   PetscErrorCode ierr;
112   PetscInt       i,rstart,nrows,*rows;
113 
114   PetscFunctionBegin;
115   *zrows = NULL;
116   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
117   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
118   for (i=0; i<nrows; i++) rows[i] += rstart;
119   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
126 {
127   PetscErrorCode ierr;
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
129   PetscInt       i,n,*garray = aij->garray;
130   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
131   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
132   PetscReal      *work;
133 
134   PetscFunctionBegin;
135   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
136   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
137   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 */
181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
182 {
183   PetscMPIInt    rank,size;
184   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
185   PetscErrorCode ierr;
186   Mat            mat;
187   Mat_SeqAIJ     *gmata;
188   PetscMPIInt    tag;
189   MPI_Status     status;
190   PetscBool      aij;
191   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
192 
193   PetscFunctionBegin;
194   CHKMEMQ;
195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
197   if (!rank) {
198     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
199     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
200   }
201   if (reuse == MAT_INITIAL_MATRIX) {
202     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
203     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
204     if (!rank) {
205       bses[0] = gmat->rmap->bs;
206       bses[1] = gmat->cmap->bs;
207     }
208     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
209     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
210     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
211     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
212     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
213     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
214 
215     rowners[0] = 0;
216     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj   = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298 
299     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
300 
301     *inmat = mat;
302   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
303     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
304     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
305     mat  = *inmat;
306     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
307     if (!rank) {
308       /* send numerical values to other processes */
309       gmata  = (Mat_SeqAIJ*) gmat->data;
310       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
311       gmataa = gmata->a;
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
315       }
316       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
317     } else {
318       /* receive numerical values from process 0*/
319       nz   = Ad->nz + Ao->nz;
320       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
321       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
322     }
323     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
324     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
325     ad = Ad->a;
326     ao = Ao->a;
327     if (mat->rmap->n) {
328       i  = 0;
329       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     for (i=1; i<mat->rmap->n; i++) {
333       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
334       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
335     }
336     i--;
337     if (mat->rmap->n) {
338       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
339     }
340     if (rank) {
341       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
342     }
343   }
344   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
346   CHKMEMQ;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351   Local utility routine that creates a mapping from the global column
352 number to the local number in the off-diagonal part of the local
353 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
354 a slightly higher hash table cost; without it it is not scalable (each processor
355 has an order N integer array but is fast to acess.
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
360 {
361   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       n = aij->B->cmap->n,i;
364 
365   PetscFunctionBegin;
366   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
367 #if defined(PETSC_USE_CTABLE)
368   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
369   for (i=0; i<n; i++) {
370     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
371   }
372 #else
373   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
374   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
382 { \
383     if (col <= lastcol1)  low1 = 0;     \
384     else                 high1 = nrow1; \
385     lastcol1 = col;\
386     while (high1-low1 > 5) { \
387       t = (low1+high1)/2; \
388       if (rp1[t] > col) high1 = t; \
389       else              low1  = t; \
390     } \
391       for (_i=low1; _i<high1; _i++) { \
392         if (rp1[_i] > col) break; \
393         if (rp1[_i] == col) { \
394           if (addv == ADD_VALUES) ap1[_i] += value;   \
395           else                    ap1[_i] = value; \
396           goto a_noinsert; \
397         } \
398       }  \
399       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
400       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
401       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
402       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
403       N = nrow1++ - 1; a->nz++; high1++; \
404       /* shift up all the later entries in this row */ \
405       for (ii=N; ii>=_i; ii--) { \
406         rp1[ii+1] = rp1[ii]; \
407         ap1[ii+1] = ap1[ii]; \
408       } \
409       rp1[_i] = col;  \
410       ap1[_i] = value;  \
411       a_noinsert: ; \
412       ailen[row] = nrow1; \
413 }
414 
415 
416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
417   { \
418     if (col <= lastcol2) low2 = 0;                        \
419     else high2 = nrow2;                                   \
420     lastcol2 = col;                                       \
421     while (high2-low2 > 5) {                              \
422       t = (low2+high2)/2;                                 \
423       if (rp2[t] > col) high2 = t;                        \
424       else             low2  = t;                         \
425     }                                                     \
426     for (_i=low2; _i<high2; _i++) {                       \
427       if (rp2[_i] > col) break;                           \
428       if (rp2[_i] == col) {                               \
429         if (addv == ADD_VALUES) ap2[_i] += value;         \
430         else                    ap2[_i] = value;          \
431         goto b_noinsert;                                  \
432       }                                                   \
433     }                                                     \
434     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
435     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
436     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
437     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
438     N = nrow2++ - 1; b->nz++; high2++;                    \
439     /* shift up all the later entries in this row */      \
440     for (ii=N; ii>=_i; ii--) {                            \
441       rp2[ii+1] = rp2[ii];                                \
442       ap2[ii+1] = ap2[ii];                                \
443     }                                                     \
444     rp2[_i] = col;                                        \
445     ap2[_i] = value;                                      \
446     b_noinsert: ;                                         \
447     bilen[row] = nrow2;                                   \
448   }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
453 {
454   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
455   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
456   PetscErrorCode ierr;
457   PetscInt       l,*garray = mat->garray,diag;
458 
459   PetscFunctionBegin;
460   /* code only works for square matrices A */
461 
462   /* find size of row to the left of the diagonal part */
463   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
464   row  = row - diag;
465   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
466     if (garray[b->j[b->i[row]+l]] > diag) break;
467   }
468   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* diagonal part */
471   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
472 
473   /* right of diagonal part */
474   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSetValues_MPIAIJ"
480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   PetscScalar    value;
484   PetscErrorCode ierr;
485   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
486   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
487   PetscBool      roworiented = aij->roworiented;
488 
489   /* Some Variables required in the macro */
490   Mat        A                 = aij->A;
491   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
492   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
493   MatScalar  *aa               = a->a;
494   PetscBool  ignorezeroentries = a->ignorezeroentries;
495   Mat        B                 = aij->B;
496   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
497   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
498   MatScalar  *ba               = b->a;
499 
500   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
501   PetscInt  nonew;
502   MatScalar *ap1,*ap2;
503 
504   PetscFunctionBegin;
505   if (v) PetscValidScalarPointer(v,6);
506   for (i=0; i<m; i++) {
507     if (im[i] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
510 #endif
511     if (im[i] >= rstart && im[i] < rend) {
512       row      = im[i] - rstart;
513       lastcol1 = -1;
514       rp1      = aj + ai[row];
515       ap1      = aa + ai[row];
516       rmax1    = aimax[row];
517       nrow1    = ailen[row];
518       low1     = 0;
519       high1    = nrow1;
520       lastcol2 = -1;
521       rp2      = bj + bi[row];
522       ap2      = ba + bi[row];
523       rmax2    = bimax[row];
524       nrow2    = bilen[row];
525       low2     = 0;
526       high2    = nrow2;
527 
528       for (j=0; j<n; j++) {
529         if (v) {
530           if (roworiented) value = v[i*n+j];
531           else             value = v[i+j*m];
532         } else value = 0.0;
533         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
534         if (in[j] >= cstart && in[j] < cend) {
535           col   = in[j] - cstart;
536           nonew = a->nonew;
537           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
538         } else if (in[j] < 0) continue;
539 #if defined(PETSC_USE_DEBUG)
540         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
541 #endif
542         else {
543           if (mat->was_assembled) {
544             if (!aij->colmap) {
545               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
546             }
547 #if defined(PETSC_USE_CTABLE)
548             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
549             col--;
550 #else
551             col = aij->colmap[in[j]] - 1;
552 #endif
553             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
554               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
555               col  =  in[j];
556               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
557               B     = aij->B;
558               b     = (Mat_SeqAIJ*)B->data;
559               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
560               rp2   = bj + bi[row];
561               ap2   = ba + bi[row];
562               rmax2 = bimax[row];
563               nrow2 = bilen[row];
564               low2  = 0;
565               high2 = nrow2;
566               bm    = aij->B->rmap->n;
567               ba    = b->a;
568             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
569           } else col = in[j];
570           nonew = b->nonew;
571           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
572         }
573       }
574     } else {
575       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
576       if (!aij->donotstash) {
577         mat->assembled = PETSC_FALSE;
578         if (roworiented) {
579           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         } else {
581           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
582         }
583       }
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatGetValues_MPIAIJ"
591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
592 {
593   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
597 
598   PetscFunctionBegin;
599   for (i=0; i<m; i++) {
600     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
601     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
602     if (idxm[i] >= rstart && idxm[i] < rend) {
603       row = idxm[i] - rstart;
604       for (j=0; j<n; j++) {
605         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
606         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
607         if (idxn[j] >= cstart && idxn[j] < cend) {
608           col  = idxn[j] - cstart;
609           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
610         } else {
611           if (!aij->colmap) {
612             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
613           }
614 #if defined(PETSC_USE_CTABLE)
615           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
616           col--;
617 #else
618           col = aij->colmap[idxn[j]] - 1;
619 #endif
620           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
621           else {
622             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
623           }
624         }
625       }
626     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       nstash,reallocs;
640   InsertMode     addv;
641 
642   PetscFunctionBegin;
643   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n; ) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) {
681           if (row[j] != rstart) break;
682         }
683         if (j < n) ncols = j-i;
684         else       ncols = n-i;
685         /* Now assemble all these values with a single function call */
686         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
687 
688         i = j;
689       }
690     }
691     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
692   }
693   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
694   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
695 
696   /* determine if any processor has disassembled, if so we must
697      also disassemble ourselfs, in order that we may reassemble. */
698   /*
699      if nonzero structure of submatrix B cannot change then we know that
700      no processor disassembled thus we can skip this stuff
701   */
702   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
703     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
704     if (mat->was_assembled && !other_disassembled) {
705       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
706     }
707   }
708   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
709     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
710   }
711   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
749   PetscInt          i,*owners = A->rmap->range;
750   PetscInt          *nprocs,j,idx,nsends,row;
751   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
752   PetscInt          *rvalues,count,base,slen,*source;
753   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
754   MPI_Comm          comm = ((PetscObject)A)->comm;
755   MPI_Request       *send_waits,*recv_waits;
756   MPI_Status        recv_status,*send_status;
757   const PetscScalar *xx;
758   PetscScalar       *bb;
759 #if defined(PETSC_DEBUG)
760   PetscBool found = PETSC_FALSE;
761 #endif
762 
763   PetscFunctionBegin;
764   /*  first count number of contributors to each processor */
765   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
766   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
767   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
768   j    = 0;
769   for (i=0; i<N; i++) {
770     if (lastidx > (idx = rows[i])) j = 0;
771     lastidx = idx;
772     for (; j<size; j++) {
773       if (idx >= owners[j] && idx < owners[j+1]) {
774         nprocs[2*j]++;
775         nprocs[2*j+1] = 1;
776         owner[i]      = j;
777 #if defined(PETSC_DEBUG)
778         found = PETSC_TRUE;
779 #endif
780         break;
781       }
782     }
783 #if defined(PETSC_DEBUG)
784     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
785     found = PETSC_FALSE;
786 #endif
787   }
788   nsends = 0;
789   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
790 
791   if (A->nooffproczerorows) {
792     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
793     nrecvs = nsends;
794     nmax   = N;
795   } else {
796     /* inform other processors of number of messages and max length*/
797     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
798   }
799 
800   /* post receives:   */
801   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
802   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
803   for (i=0; i<nrecvs; i++) {
804     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
805   }
806 
807   /* do sends:
808       1) starts[i] gives the starting index in svalues for stuff going to
809          the ith processor
810   */
811   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
812   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
813   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
814 
815   starts[0] = 0;
816   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
817   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
818 
819   starts[0] = 0;
820   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
821   count = 0;
822   for (i=0; i<size; i++) {
823     if (nprocs[2*i+1]) {
824       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
825     }
826   }
827   ierr = PetscFree(starts);CHKERRQ(ierr);
828 
829   base = owners[rank];
830 
831   /*  wait on receives */
832   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
833   count = nrecvs; slen = 0;
834   while (count) {
835     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
836     /* unpack receives into our local space */
837     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
838 
839     source[imdex] = recv_status.MPI_SOURCE;
840     lens[imdex]   = n;
841     slen         += n;
842     count--;
843   }
844   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
845 
846   /* move the data into the send scatter */
847   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
848   count = 0;
849   for (i=0; i<nrecvs; i++) {
850     values = rvalues + i*nmax;
851     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
852   }
853   ierr = PetscFree(rvalues);CHKERRQ(ierr);
854   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
855   ierr = PetscFree(owner);CHKERRQ(ierr);
856   ierr = PetscFree(nprocs);CHKERRQ(ierr);
857 
858   /* fix right hand side if needed */
859   if (x && b) {
860     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
861     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
862     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
863     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
864     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
865   }
866   /*
867         Zero the required rows. If the "diagonal block" of the matrix
868      is square and the user wishes to set the diagonal we use separate
869      code so that MatSetValues() is not called for each diagonal allocating
870      new memory, thus calling lots of mallocs and slowing things down.
871 
872   */
873   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
874   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
875   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
876     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
877   } else if (diag != 0.0) {
878     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
879     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
880     for (i = 0; i < slen; i++) {
881       row  = lrows[i] + rstart;
882       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
883     }
884     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886   } else {
887     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
888   }
889   ierr = PetscFree(lrows);CHKERRQ(ierr);
890 
891   /* wait on sends */
892   if (nsends) {
893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
895     ierr = PetscFree(send_status);CHKERRQ(ierr);
896   }
897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
898   ierr = PetscFree(svalues);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
904 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
905 {
906   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
907   PetscErrorCode    ierr;
908   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
909   PetscInt          i,*owners = A->rmap->range;
910   PetscInt          *nprocs,j,idx,nsends;
911   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
912   PetscInt          *rvalues,count,base,slen,*source;
913   PetscInt          *lens,*lrows,*values,m;
914   MPI_Comm          comm = ((PetscObject)A)->comm;
915   MPI_Request       *send_waits,*recv_waits;
916   MPI_Status        recv_status,*send_status;
917   const PetscScalar *xx;
918   PetscScalar       *bb,*mask;
919   Vec               xmask,lmask;
920   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
921   const PetscInt    *aj, *ii,*ridx;
922   PetscScalar       *aa;
923 #if defined(PETSC_DEBUG)
924   PetscBool found = PETSC_FALSE;
925 #endif
926 
927   PetscFunctionBegin;
928   /*  first count number of contributors to each processor */
929   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
930   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
931   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
932   j    = 0;
933   for (i=0; i<N; i++) {
934     if (lastidx > (idx = rows[i])) j = 0;
935     lastidx = idx;
936     for (; j<size; j++) {
937       if (idx >= owners[j] && idx < owners[j+1]) {
938         nprocs[2*j]++;
939         nprocs[2*j+1] = 1;
940         owner[i]      = j;
941 #if defined(PETSC_DEBUG)
942         found = PETSC_TRUE;
943 #endif
944         break;
945       }
946     }
947 #if defined(PETSC_DEBUG)
948     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
949     found = PETSC_FALSE;
950 #endif
951   }
952   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
953 
954   /* inform other processors of number of messages and max length*/
955   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
956 
957   /* post receives:   */
958   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
959   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
960   for (i=0; i<nrecvs; i++) {
961     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
962   }
963 
964   /* do sends:
965       1) starts[i] gives the starting index in svalues for stuff going to
966          the ith processor
967   */
968   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
969   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
970   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
971 
972   starts[0] = 0;
973   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
974   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
975 
976   starts[0] = 0;
977   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
978   count = 0;
979   for (i=0; i<size; i++) {
980     if (nprocs[2*i+1]) {
981       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
982     }
983   }
984   ierr = PetscFree(starts);CHKERRQ(ierr);
985 
986   base = owners[rank];
987 
988   /*  wait on receives */
989   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
990   count = nrecvs; slen = 0;
991   while (count) {
992     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
993     /* unpack receives into our local space */
994     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
995 
996     source[imdex] = recv_status.MPI_SOURCE;
997     lens[imdex]   = n;
998     slen         += n;
999     count--;
1000   }
1001   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1002 
1003   /* move the data into the send scatter */
1004   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1005   count = 0;
1006   for (i=0; i<nrecvs; i++) {
1007     values = rvalues + i*nmax;
1008     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1024   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1025   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1028   if (x) {
1029     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1032     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1033   }
1034   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1035 
1036   /* remove zeroed rows of off diagonal matrix */
1037   ii = aij->i;
1038   for (i=0; i<slen; i++) {
1039     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1040   }
1041 
1042   /* loop over all elements of off process part of matrix zeroing removed columns*/
1043   if (aij->compressedrow.use) {
1044     m    = aij->compressedrow.nrows;
1045     ii   = aij->compressedrow.i;
1046     ridx = aij->compressedrow.rindex;
1047     for (i=0; i<m; i++) {
1048       n  = ii[i+1] - ii[i];
1049       aj = aij->j + ii[i];
1050       aa = aij->a + ii[i];
1051 
1052       for (j=0; j<n; j++) {
1053         if (PetscAbsScalar(mask[*aj])) {
1054           if (b) bb[*ridx] -= *aa*xx[*aj];
1055           *aa = 0.0;
1056         }
1057         aa++;
1058         aj++;
1059       }
1060       ridx++;
1061     }
1062   } else { /* do not use compressed row format */
1063     m = l->B->rmap->n;
1064     for (i=0; i<m; i++) {
1065       n  = ii[i+1] - ii[i];
1066       aj = aij->j + ii[i];
1067       aa = aij->a + ii[i];
1068       for (j=0; j<n; j++) {
1069         if (PetscAbsScalar(mask[*aj])) {
1070           if (b) bb[i] -= *aa*xx[*aj];
1071           *aa = 0.0;
1072         }
1073         aa++;
1074         aj++;
1075       }
1076     }
1077   }
1078   if (x) {
1079     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1080     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1081   }
1082   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1083   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1084   ierr = PetscFree(lrows);CHKERRQ(ierr);
1085 
1086   /* wait on sends */
1087   if (nsends) {
1088     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1089     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1090     ierr = PetscFree(send_status);CHKERRQ(ierr);
1091   }
1092   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1093   ierr = PetscFree(svalues);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatMult_MPIAIJ"
1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       nt;
1104 
1105   PetscFunctionBegin;
1106   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1107   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1108   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1110   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1117 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1118 {
1119   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1129 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1130 {
1131   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1137   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1144 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148   PetscBool      merged;
1149 
1150   PetscFunctionBegin;
1151   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   if (!merged) {
1155     /* send it on its way */
1156     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157     /* do local part */
1158     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1159     /* receive remote parts: note this assumes the values are not actually */
1160     /* added in yy until the next line, */
1161     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1162   } else {
1163     /* do local part */
1164     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1165     /* send it on its way */
1166     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1167     /* values actually were received in the Begin() but we need to call this nop */
1168     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 EXTERN_C_BEGIN
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1176 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1177 {
1178   MPI_Comm       comm;
1179   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1180   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1181   IS             Me,Notme;
1182   PetscErrorCode ierr;
1183   PetscInt       M,N,first,last,*notme,i;
1184   PetscMPIInt    size;
1185 
1186   PetscFunctionBegin;
1187   /* Easy test: symmetric diagonal block */
1188   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1189   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1190   if (!*f) PetscFunctionReturn(0);
1191   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1193   if (size == 1) PetscFunctionReturn(0);
1194 
1195   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1196   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1197   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1198   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1199   for (i=0; i<first; i++) notme[i] = i;
1200   for (i=last; i<M; i++) notme[i-last+first] = i;
1201   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1202   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1203   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1204   Aoff = Aoffs[0];
1205   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1206   Boff = Boffs[0];
1207   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1208   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1209   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1210   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1211   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1212   ierr = PetscFree(notme);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 EXTERN_C_END
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1219 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1220 {
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   /* do nondiagonal part */
1226   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1227   /* send it on its way */
1228   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   /* do local part */
1230   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1231   /* receive remote parts */
1232   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 /*
1237   This only works correctly for square matrices where the subblock A->A is the
1238    diagonal block
1239 */
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1242 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1243 {
1244   PetscErrorCode ierr;
1245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1246 
1247   PetscFunctionBegin;
1248   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1249   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1250   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatScale_MPIAIJ"
1256 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1257 {
1258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1263   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_MPIAIJ"
1269 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1270 {
1271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1277 #endif
1278   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1279   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1280   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1282 #if defined(PETSC_USE_CTABLE)
1283   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1284 #else
1285   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1286 #endif
1287   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1288   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1289   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1290   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1291   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1312   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1313   PetscErrorCode ierr;
1314   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1315   int            fd;
1316   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1317   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1318   PetscScalar    *column_values;
1319   PetscInt       message_count,flowcontrolcount;
1320   FILE           *file;
1321 
1322   PetscFunctionBegin;
1323   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1324   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1325   nz   = A->nz + B->nz;
1326   if (!rank) {
1327     header[0] = MAT_FILE_CLASSID;
1328     header[1] = mat->rmap->N;
1329     header[2] = mat->cmap->N;
1330 
1331     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1332     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1333     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     /* get largest number of rows any processor has */
1335     rlen  = mat->rmap->n;
1336     range = mat->rmap->range;
1337     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1338   } else {
1339     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     rlen = mat->rmap->n;
1341   }
1342 
1343   /* load up the local row counts */
1344   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1345   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1346 
1347   /* store the row lengths to the file */
1348   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1349   if (!rank) {
1350     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1351     for (i=1; i<size; i++) {
1352       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1353       rlen = range[i+1] - range[i];
1354       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1355       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1356     }
1357     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1358   } else {
1359     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1360     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1364 
1365   /* load up the local column indices */
1366   nzmax = nz; /* th processor needs space a largest processor needs */
1367   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1369   cnt   = 0;
1370   for (i=0; i<mat->rmap->n; i++) {
1371     for (j=B->i[i]; j<B->i[i+1]; j++) {
1372       if ((col = garray[B->j[j]]) > cstart) break;
1373       column_indices[cnt++] = col;
1374     }
1375     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1376     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1377   }
1378   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1379 
1380   /* store the column indices to the file */
1381   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1382   if (!rank) {
1383     MPI_Status status;
1384     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1385     for (i=1; i<size; i++) {
1386       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1387       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1388       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1389       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1390       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1391     }
1392     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1393   } else {
1394     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1395     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1396     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1397     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1398   }
1399   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1400 
1401   /* load up the local column values */
1402   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1403   cnt  = 0;
1404   for (i=0; i<mat->rmap->n; i++) {
1405     for (j=B->i[i]; j<B->i[i+1]; j++) {
1406       if (garray[B->j[j]] > cstart) break;
1407       column_values[cnt++] = B->a[j];
1408     }
1409     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1410     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1411   }
1412   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1413 
1414   /* store the column values to the file */
1415   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1416   if (!rank) {
1417     MPI_Status status;
1418     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1419     for (i=1; i<size; i++) {
1420       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1421       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1422       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1423       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1425     }
1426     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1427   } else {
1428     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1429     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1430     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1431     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1432   }
1433   ierr = PetscFree(column_values);CHKERRQ(ierr);
1434 
1435   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1436   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1442 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1443 {
1444   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1445   PetscErrorCode    ierr;
1446   PetscMPIInt       rank = aij->rank,size = aij->size;
1447   PetscBool         isdraw,iascii,isbinary;
1448   PetscViewer       sviewer;
1449   PetscViewerFormat format;
1450 
1451   PetscFunctionBegin;
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1455   if (iascii) {
1456     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1457     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1458       MatInfo   info;
1459       PetscBool inodes;
1460 
1461       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1462       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1463       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1465       if (!inodes) {
1466         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1467                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1468       } else {
1469         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1470                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1471       }
1472       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1474       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1478       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1479       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1480       PetscFunctionReturn(0);
1481     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1482       PetscInt inodecount,inodelimit,*inodes;
1483       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1484       if (inodes) {
1485         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1488       }
1489       PetscFunctionReturn(0);
1490     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1491       PetscFunctionReturn(0);
1492     }
1493   } else if (isbinary) {
1494     if (size == 1) {
1495       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1496       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1497     } else {
1498       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1499     }
1500     PetscFunctionReturn(0);
1501   } else if (isdraw) {
1502     PetscDraw draw;
1503     PetscBool isnull;
1504     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1505     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1506   }
1507 
1508   if (size == 1) {
1509     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1510     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1511   } else {
1512     /* assemble the entire matrix onto first processor. */
1513     Mat        A;
1514     Mat_SeqAIJ *Aloc;
1515     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1516     MatScalar  *a;
1517 
1518     if (mat->rmap->N > 1024) {
1519       PetscBool flg = PETSC_FALSE;
1520 
1521       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1522       if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1523     }
1524 
1525     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1526     if (!rank) {
1527       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1528     } else {
1529       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1530     }
1531     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1532     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1533     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1534     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1535     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1536 
1537     /* copy over the A part */
1538     Aloc = (Mat_SeqAIJ*)aij->A->data;
1539     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1540     row  = mat->rmap->rstart;
1541     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1542     for (i=0; i<m; i++) {
1543       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1544       row++;
1545       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1546     }
1547     aj = Aloc->j;
1548     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1549 
1550     /* copy over the B part */
1551     Aloc = (Mat_SeqAIJ*)aij->B->data;
1552     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1553     row  = mat->rmap->rstart;
1554     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1555     ct   = cols;
1556     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1557     for (i=0; i<m; i++) {
1558       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1559       row++;
1560       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1561     }
1562     ierr = PetscFree(ct);CHKERRQ(ierr);
1563     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1564     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1565     /*
1566        Everyone has to call to draw the matrix since the graphics waits are
1567        synchronized across all processors that share the PetscDraw object
1568     */
1569     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1570     if (!rank) {
1571       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1572       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1573       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1574       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1575     }
1576     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1577     ierr = MatDestroy(&A);CHKERRQ(ierr);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatView_MPIAIJ"
1584 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1585 {
1586   PetscErrorCode ierr;
1587   PetscBool      iascii,isdraw,issocket,isbinary;
1588 
1589   PetscFunctionBegin;
1590   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1594   if (iascii || isdraw || isbinary || issocket) {
1595     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatSOR_MPIAIJ"
1602 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1603 {
1604   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1605   PetscErrorCode ierr;
1606   Vec            bb1 = 0;
1607   PetscBool      hasop;
1608 
1609   PetscFunctionBegin;
1610   if (flag == SOR_APPLY_UPPER) {
1611     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1612     PetscFunctionReturn(0);
1613   }
1614 
1615   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1616     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1617   }
1618 
1619   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1620     if (flag & SOR_ZERO_INITIAL_GUESS) {
1621       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1622       its--;
1623     }
1624 
1625     while (its--) {
1626       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1627       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628 
1629       /* update rhs: bb1 = bb - B*x */
1630       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1631       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1632 
1633       /* local sweep */
1634       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1635     }
1636   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1637     if (flag & SOR_ZERO_INITIAL_GUESS) {
1638       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1639       its--;
1640     }
1641     while (its--) {
1642       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644 
1645       /* update rhs: bb1 = bb - B*x */
1646       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1647       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1648 
1649       /* local sweep */
1650       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1651     }
1652   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1653     if (flag & SOR_ZERO_INITIAL_GUESS) {
1654       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1655       its--;
1656     }
1657     while (its--) {
1658       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660 
1661       /* update rhs: bb1 = bb - B*x */
1662       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1663       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1664 
1665       /* local sweep */
1666       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1667     }
1668   } else if (flag & SOR_EISENSTAT) {
1669     Vec xx1;
1670 
1671     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1672     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1673 
1674     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1676     if (!mat->diag) {
1677       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1678       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1679     }
1680     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1681     if (hasop) {
1682       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1683     } else {
1684       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1685     }
1686     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1687 
1688     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1689 
1690     /* local sweep */
1691     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1692     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1693     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1694   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1695 
1696   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatPermute_MPIAIJ"
1702 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1703 {
1704   Mat            aA,aB,Aperm;
1705   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1706   PetscScalar    *aa,*ba;
1707   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1708   PetscSF        rowsf,sf;
1709   IS             parcolp = NULL;
1710   PetscBool      done;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1715   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1716   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1717   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1718 
1719   /* Invert row permutation to find out where my rows should go */
1720   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1721   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1722   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1723   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1724   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1725   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1726 
1727   /* Invert column permutation to find out where my columns should go */
1728   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1729   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1730   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1731   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1732   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1733   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1734   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1735 
1736   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1737   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1738   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1739 
1740   /* Find out where my gcols should go */
1741   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1742   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1743   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1744   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1745   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1746   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1747   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1748   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1749 
1750   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1751   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1754   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1755   for (i=0; i<m; i++) {
1756     PetscInt row = rdest[i],rowner;
1757     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1758     for (j=ai[i]; j<ai[i+1]; j++) {
1759       PetscInt cowner,col = cdest[aj[j]];
1760       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1761       if (rowner == cowner) dnnz[i]++;
1762       else onnz[i]++;
1763     }
1764     for (j=bi[i]; j<bi[i+1]; j++) {
1765       PetscInt cowner,col = gcdest[bj[j]];
1766       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1767       if (rowner == cowner) dnnz[i]++;
1768       else onnz[i]++;
1769     }
1770   }
1771   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1774   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1777   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1778 
1779   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1780   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1781   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1782   for (i=0; i<m; i++) {
1783     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1784     PetscInt rowlen;
1785     rowlen = ai[i+1] - ai[i];
1786     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1787     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1788     rowlen = bi[i+1] - bi[i];
1789     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1790     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1791   }
1792   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1795   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1796   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1797   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1798   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1799   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1800   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1801   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1802   *B = Aperm;
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1808 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1809 {
1810   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1811   Mat            A    = mat->A,B = mat->B;
1812   PetscErrorCode ierr;
1813   PetscReal      isend[5],irecv[5];
1814 
1815   PetscFunctionBegin;
1816   info->block_size = 1.0;
1817   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1818 
1819   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1820   isend[3] = info->memory;  isend[4] = info->mallocs;
1821 
1822   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1823 
1824   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1825   isend[3] += info->memory;  isend[4] += info->mallocs;
1826   if (flag == MAT_LOCAL) {
1827     info->nz_used      = isend[0];
1828     info->nz_allocated = isend[1];
1829     info->nz_unneeded  = isend[2];
1830     info->memory       = isend[3];
1831     info->mallocs      = isend[4];
1832   } else if (flag == MAT_GLOBAL_MAX) {
1833     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1834 
1835     info->nz_used      = irecv[0];
1836     info->nz_allocated = irecv[1];
1837     info->nz_unneeded  = irecv[2];
1838     info->memory       = irecv[3];
1839     info->mallocs      = irecv[4];
1840   } else if (flag == MAT_GLOBAL_SUM) {
1841     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1842 
1843     info->nz_used      = irecv[0];
1844     info->nz_allocated = irecv[1];
1845     info->nz_unneeded  = irecv[2];
1846     info->memory       = irecv[3];
1847     info->mallocs      = irecv[4];
1848   }
1849   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1850   info->fill_ratio_needed = 0;
1851   info->factor_mallocs    = 0;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatSetOption_MPIAIJ"
1857 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1858 {
1859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   switch (op) {
1864   case MAT_NEW_NONZERO_LOCATIONS:
1865   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1866   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1867   case MAT_KEEP_NONZERO_PATTERN:
1868   case MAT_NEW_NONZERO_LOCATION_ERR:
1869   case MAT_USE_INODES:
1870   case MAT_IGNORE_ZERO_ENTRIES:
1871     MatCheckPreallocated(A,1);
1872     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1873     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1874     break;
1875   case MAT_ROW_ORIENTED:
1876     a->roworiented = flg;
1877 
1878     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1879     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1880     break;
1881   case MAT_NEW_DIAGONALS:
1882     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1883     break;
1884   case MAT_IGNORE_OFF_PROC_ENTRIES:
1885     a->donotstash = flg;
1886     break;
1887   case MAT_SPD:
1888     A->spd_set = PETSC_TRUE;
1889     A->spd     = flg;
1890     if (flg) {
1891       A->symmetric                  = PETSC_TRUE;
1892       A->structurally_symmetric     = PETSC_TRUE;
1893       A->symmetric_set              = PETSC_TRUE;
1894       A->structurally_symmetric_set = PETSC_TRUE;
1895     }
1896     break;
1897   case MAT_SYMMETRIC:
1898     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1899     break;
1900   case MAT_STRUCTURALLY_SYMMETRIC:
1901     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1902     break;
1903   case MAT_HERMITIAN:
1904     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1905     break;
1906   case MAT_SYMMETRY_ETERNAL:
1907     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1908     break;
1909   default:
1910     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatGetRow_MPIAIJ"
1917 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1918 {
1919   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1920   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1921   PetscErrorCode ierr;
1922   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1923   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1924   PetscInt       *cmap,*idx_p;
1925 
1926   PetscFunctionBegin;
1927   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1928   mat->getrowactive = PETSC_TRUE;
1929 
1930   if (!mat->rowvalues && (idx || v)) {
1931     /*
1932         allocate enough space to hold information from the longest row.
1933     */
1934     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1935     PetscInt   max = 1,tmp;
1936     for (i=0; i<matin->rmap->n; i++) {
1937       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1938       if (max < tmp) max = tmp;
1939     }
1940     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1941   }
1942 
1943   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1944   lrow = row - rstart;
1945 
1946   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1947   if (!v)   {pvA = 0; pvB = 0;}
1948   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1949   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1950   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1951   nztot = nzA + nzB;
1952 
1953   cmap = mat->garray;
1954   if (v  || idx) {
1955     if (nztot) {
1956       /* Sort by increasing column numbers, assuming A and B already sorted */
1957       PetscInt imark = -1;
1958       if (v) {
1959         *v = v_p = mat->rowvalues;
1960         for (i=0; i<nzB; i++) {
1961           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1962           else break;
1963         }
1964         imark = i;
1965         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1966         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1967       }
1968       if (idx) {
1969         *idx = idx_p = mat->rowindices;
1970         if (imark > -1) {
1971           for (i=0; i<imark; i++) {
1972             idx_p[i] = cmap[cworkB[i]];
1973           }
1974         } else {
1975           for (i=0; i<nzB; i++) {
1976             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1977             else break;
1978           }
1979           imark = i;
1980         }
1981         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1982         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1983       }
1984     } else {
1985       if (idx) *idx = 0;
1986       if (v)   *v   = 0;
1987     }
1988   }
1989   *nz  = nztot;
1990   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1991   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1997 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1998 {
1999   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2000 
2001   PetscFunctionBegin;
2002   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2003   aij->getrowactive = PETSC_FALSE;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatNorm_MPIAIJ"
2009 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2010 {
2011   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2012   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2013   PetscErrorCode ierr;
2014   PetscInt       i,j,cstart = mat->cmap->rstart;
2015   PetscReal      sum = 0.0;
2016   MatScalar      *v;
2017 
2018   PetscFunctionBegin;
2019   if (aij->size == 1) {
2020     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2021   } else {
2022     if (type == NORM_FROBENIUS) {
2023       v = amat->a;
2024       for (i=0; i<amat->nz; i++) {
2025         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2026       }
2027       v = bmat->a;
2028       for (i=0; i<bmat->nz; i++) {
2029         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2030       }
2031       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2032       *norm = PetscSqrtReal(*norm);
2033     } else if (type == NORM_1) { /* max column norm */
2034       PetscReal *tmp,*tmp2;
2035       PetscInt  *jj,*garray = aij->garray;
2036       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2037       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2038       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2039       *norm = 0.0;
2040       v     = amat->a; jj = amat->j;
2041       for (j=0; j<amat->nz; j++) {
2042         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2043       }
2044       v = bmat->a; jj = bmat->j;
2045       for (j=0; j<bmat->nz; j++) {
2046         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2047       }
2048       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2049       for (j=0; j<mat->cmap->N; j++) {
2050         if (tmp2[j] > *norm) *norm = tmp2[j];
2051       }
2052       ierr = PetscFree(tmp);CHKERRQ(ierr);
2053       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2054     } else if (type == NORM_INFINITY) { /* max row norm */
2055       PetscReal ntemp = 0.0;
2056       for (j=0; j<aij->A->rmap->n; j++) {
2057         v   = amat->a + amat->i[j];
2058         sum = 0.0;
2059         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2060           sum += PetscAbsScalar(*v); v++;
2061         }
2062         v = bmat->a + bmat->i[j];
2063         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2064           sum += PetscAbsScalar(*v); v++;
2065         }
2066         if (sum > ntemp) ntemp = sum;
2067       }
2068       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2069     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatTranspose_MPIAIJ"
2076 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2077 {
2078   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2079   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2080   PetscErrorCode ierr;
2081   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2082   PetscInt       cstart = A->cmap->rstart,ncol;
2083   Mat            B;
2084   MatScalar      *array;
2085 
2086   PetscFunctionBegin;
2087   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2088 
2089   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2090   ai = Aloc->i; aj = Aloc->j;
2091   bi = Bloc->i; bj = Bloc->j;
2092   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2093     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2094     PetscSFNode          *oloc;
2095     PETSC_UNUSED PetscSF sf;
2096 
2097     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2098     /* compute d_nnz for preallocation */
2099     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2100     for (i=0; i<ai[ma]; i++) {
2101       d_nnz[aj[i]]++;
2102       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2103     }
2104     /* compute local off-diagonal contributions */
2105     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2106     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2107     /* map those to global */
2108     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2109     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2110     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2111     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2112     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2113     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2114     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2115 
2116     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2117     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2118     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2119     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2120     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2121     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2122   } else {
2123     B    = *matout;
2124     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2125     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2126   }
2127 
2128   /* copy over the A part */
2129   array = Aloc->a;
2130   row   = A->rmap->rstart;
2131   for (i=0; i<ma; i++) {
2132     ncol = ai[i+1]-ai[i];
2133     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2134     row++;
2135     array += ncol; aj += ncol;
2136   }
2137   aj = Aloc->j;
2138   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2139 
2140   /* copy over the B part */
2141   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2142   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2143   array = Bloc->a;
2144   row   = A->rmap->rstart;
2145   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2146   cols_tmp = cols;
2147   for (i=0; i<mb; i++) {
2148     ncol = bi[i+1]-bi[i];
2149     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2150     row++;
2151     array += ncol; cols_tmp += ncol;
2152   }
2153   ierr = PetscFree(cols);CHKERRQ(ierr);
2154 
2155   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2158     *matout = B;
2159   } else {
2160     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2161   }
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2167 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2168 {
2169   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2170   Mat            a    = aij->A,b = aij->B;
2171   PetscErrorCode ierr;
2172   PetscInt       s1,s2,s3;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2176   if (rr) {
2177     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2178     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2179     /* Overlap communication with computation. */
2180     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2181   }
2182   if (ll) {
2183     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2184     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2185     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2186   }
2187   /* scale  the diagonal block */
2188   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2189 
2190   if (rr) {
2191     /* Do a scatter end and then right scale the off-diagonal block */
2192     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2193     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2194   }
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2200 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2201 {
2202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatEqual_MPIAIJ"
2212 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2213 {
2214   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2215   Mat            a,b,c,d;
2216   PetscBool      flg;
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   a = matA->A; b = matA->B;
2221   c = matB->A; d = matB->B;
2222 
2223   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2224   if (flg) {
2225     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2226   }
2227   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatCopy_MPIAIJ"
2233 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2234 {
2235   PetscErrorCode ierr;
2236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2237   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2238 
2239   PetscFunctionBegin;
2240   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2241   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2242     /* because of the column compression in the off-processor part of the matrix a->B,
2243        the number of columns in a->B and b->B may be different, hence we cannot call
2244        the MatCopy() directly on the two parts. If need be, we can provide a more
2245        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2246        then copying the submatrices */
2247     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2248   } else {
2249     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2250     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2251   }
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatSetUp_MPIAIJ"
2257 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2258 {
2259   PetscErrorCode ierr;
2260 
2261   PetscFunctionBegin;
2262   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2268 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2269 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2270 {
2271   PetscInt       i,m=Y->rmap->N;
2272   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2273   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2274   const PetscInt *xi = x->i,*yi = y->i;
2275 
2276   PetscFunctionBegin;
2277   /* Set the number of nonzeros in the new matrix */
2278   for (i=0; i<m; i++) {
2279     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2280     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2281     nnz[i] = 0;
2282     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2283       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2284       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2285       nnz[i]++;
2286     }
2287     for (; k<nzy; k++) nnz[i]++;
2288   }
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatAXPY_MPIAIJ"
2294 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2295 {
2296   PetscErrorCode ierr;
2297   PetscInt       i;
2298   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2299   PetscBLASInt   bnz,one=1;
2300   Mat_SeqAIJ     *x,*y;
2301 
2302   PetscFunctionBegin;
2303   if (str == SAME_NONZERO_PATTERN) {
2304     PetscScalar alpha = a;
2305     x    = (Mat_SeqAIJ*)xx->A->data;
2306     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2307     y    = (Mat_SeqAIJ*)yy->A->data;
2308     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2309     x    = (Mat_SeqAIJ*)xx->B->data;
2310     y    = (Mat_SeqAIJ*)yy->B->data;
2311     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2312     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2313   } else if (str == SUBSET_NONZERO_PATTERN) {
2314     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2315 
2316     x = (Mat_SeqAIJ*)xx->B->data;
2317     y = (Mat_SeqAIJ*)yy->B->data;
2318     if (y->xtoy && y->XtoY != xx->B) {
2319       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2320       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2321     }
2322     if (!y->xtoy) { /* get xtoy */
2323       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2324       y->XtoY = xx->B;
2325       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2326     }
2327     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2328   } else {
2329     Mat      B;
2330     PetscInt *nnz_d,*nnz_o;
2331     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2332     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2333     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2334     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2335     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2336     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2337     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2338     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2339     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2340     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2341     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2342     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2343     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2344     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2345   }
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatConjugate_MPIAIJ"
2353 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2354 {
2355 #if defined(PETSC_USE_COMPLEX)
2356   PetscErrorCode ierr;
2357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2361   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2362 #else
2363   PetscFunctionBegin;
2364 #endif
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatRealPart_MPIAIJ"
2370 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2371 {
2372   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2377   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2383 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2384 {
2385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2390   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #if defined(PETSC_HAVE_PBGL)
2395 
2396 #include <boost/parallel/mpi/bsp_process_group.hpp>
2397 #include <boost/graph/distributed/ilu_default_graph.hpp>
2398 #include <boost/graph/distributed/ilu_0_block.hpp>
2399 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2400 #include <boost/graph/distributed/petsc/interface.hpp>
2401 #include <boost/multi_array.hpp>
2402 #include <boost/parallel/distributed_property_map->hpp>
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2406 /*
2407   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2408 */
2409 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2410 {
2411   namespace petsc = boost::distributed::petsc;
2412 
2413   namespace graph_dist = boost::graph::distributed;
2414   using boost::graph::distributed::ilu_default::process_group_type;
2415   using boost::graph::ilu_permuted;
2416 
2417   PetscBool      row_identity, col_identity;
2418   PetscContainer c;
2419   PetscInt       m, n, M, N;
2420   PetscErrorCode ierr;
2421 
2422   PetscFunctionBegin;
2423   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2424   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2425   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2426   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2427 
2428   process_group_type pg;
2429   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2430   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2431   lgraph_type& level_graph = *lgraph_p;
2432   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2433 
2434   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2435   ilu_permuted(level_graph);
2436 
2437   /* put together the new matrix */
2438   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2439   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2440   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2441   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2442   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2443   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2444   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2445   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2446 
2447   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2448   ierr = PetscContainerSetPointer(c, lgraph_p);
2449   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2450   ierr = PetscContainerDestroy(&c);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2456 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2457 {
2458   PetscFunctionBegin;
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSolve_MPIAIJ"
2464 /*
2465   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2466 */
2467 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2468 {
2469   namespace graph_dist = boost::graph::distributed;
2470 
2471   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2472   lgraph_type    *lgraph_p;
2473   PetscContainer c;
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2478   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2479   ierr = VecCopy(b, x);CHKERRQ(ierr);
2480 
2481   PetscScalar *array_x;
2482   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2483   PetscInt sx;
2484   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2485 
2486   PetscScalar *array_b;
2487   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2488   PetscInt sb;
2489   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2490 
2491   lgraph_type& level_graph = *lgraph_p;
2492   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2493 
2494   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2495   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2496   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2497 
2498   typedef boost::iterator_property_map<array_ref_type::iterator,
2499                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2500   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2501   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2502 
2503   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2504   PetscFunctionReturn(0);
2505 }
2506 #endif
2507 
2508 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2509   PetscInt    nzlocal,nsends,nrecvs;
2510   PetscMPIInt *send_rank,*recv_rank;
2511   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2512   PetscScalar *sbuf_a,**rbuf_a;
2513   PetscErrorCode (*Destroy)(Mat);
2514 } Mat_Redundant;
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2518 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2519 {
2520   PetscErrorCode ierr;
2521   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2522   PetscInt       i;
2523 
2524   PetscFunctionBegin;
2525   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2526   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2527   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2528   for (i=0; i<redund->nrecvs; i++) {
2529     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2531   }
2532   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2533   ierr = PetscFree(redund);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatDestroy_MatRedundant"
2539 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2540 {
2541   PetscErrorCode ierr;
2542   PetscContainer container;
2543   Mat_Redundant  *redund=NULL;
2544 
2545   PetscFunctionBegin;
2546   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2547   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2548   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2549 
2550   A->ops->destroy = redund->Destroy;
2551 
2552   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2553   if (A->ops->destroy) {
2554     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2555   }
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2561 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2562 {
2563   PetscMPIInt    rank,size;
2564   MPI_Comm       comm=((PetscObject)mat)->comm;
2565   PetscErrorCode ierr;
2566   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2567   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2568   PetscInt       *rowrange = mat->rmap->range;
2569   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2570   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2571   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2572   PetscScalar    *sbuf_a;
2573   PetscInt       nzlocal=a->nz+b->nz;
2574   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2575   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2576   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2577   MatScalar      *aworkA,*aworkB;
2578   PetscScalar    *vals;
2579   PetscMPIInt    tag1,tag2,tag3,imdex;
2580   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2581   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2582   MPI_Status     recv_status,*send_status;
2583   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2584   PetscInt       **rbuf_j=NULL;
2585   PetscScalar    **rbuf_a=NULL;
2586   Mat_Redundant  *redund =NULL;
2587   PetscContainer container;
2588 
2589   PetscFunctionBegin;
2590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2592 
2593   if (reuse == MAT_REUSE_MATRIX) {
2594     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2595     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2596     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2597     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2598     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2599     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2600     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2601     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2602 
2603     nsends    = redund->nsends;
2604     nrecvs    = redund->nrecvs;
2605     send_rank = redund->send_rank;
2606     recv_rank = redund->recv_rank;
2607     sbuf_nz   = redund->sbuf_nz;
2608     rbuf_nz   = redund->rbuf_nz;
2609     sbuf_j    = redund->sbuf_j;
2610     sbuf_a    = redund->sbuf_a;
2611     rbuf_j    = redund->rbuf_j;
2612     rbuf_a    = redund->rbuf_a;
2613   }
2614 
2615   if (reuse == MAT_INITIAL_MATRIX) {
2616     PetscMPIInt subrank,subsize;
2617     PetscInt    nleftover,np_subcomm;
2618     /* get the destination processors' id send_rank, nsends and nrecvs */
2619     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2620     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2621     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2622 
2623     np_subcomm = size/nsubcomm;
2624     nleftover  = size - nsubcomm*np_subcomm;
2625 
2626     nsends = 0; nrecvs = 0;
2627     for (i=0; i<size; i++) { /* i=rank*/
2628       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2629         send_rank[nsends]   = i; nsends++;
2630         recv_rank[nrecvs++] = i;
2631       }
2632     }
2633     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2634       i = size-nleftover-1;
2635       j = 0;
2636       while (j < nsubcomm - nleftover) {
2637         send_rank[nsends++] = i;
2638         i--; j++;
2639       }
2640     }
2641 
2642     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2643       for (i=0; i<nleftover; i++) {
2644         recv_rank[nrecvs++] = size-nleftover+i;
2645       }
2646     }
2647 
2648     /* allocate sbuf_j, sbuf_a */
2649     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2650     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2651     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2652   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2653 
2654   /* copy mat's local entries into the buffers */
2655   if (reuse == MAT_INITIAL_MATRIX) {
2656     rownz_max = 0;
2657     rptr      = sbuf_j;
2658     cols      = sbuf_j + rend-rstart + 1;
2659     vals      = sbuf_a;
2660     rptr[0]   = 0;
2661     for (i=0; i<rend-rstart; i++) {
2662       row    = i + rstart;
2663       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2664       ncols  = nzA + nzB;
2665       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2666       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2667       /* load the column indices for this row into cols */
2668       lwrite = 0;
2669       for (l=0; l<nzB; l++) {
2670         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2671           vals[lwrite]   = aworkB[l];
2672           cols[lwrite++] = ctmp;
2673         }
2674       }
2675       for (l=0; l<nzA; l++) {
2676         vals[lwrite]   = aworkA[l];
2677         cols[lwrite++] = cstart + cworkA[l];
2678       }
2679       for (l=0; l<nzB; l++) {
2680         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2681           vals[lwrite]   = aworkB[l];
2682           cols[lwrite++] = ctmp;
2683         }
2684       }
2685       vals     += ncols;
2686       cols     += ncols;
2687       rptr[i+1] = rptr[i] + ncols;
2688       if (rownz_max < ncols) rownz_max = ncols;
2689     }
2690     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2691   } else { /* only copy matrix values into sbuf_a */
2692     rptr    = sbuf_j;
2693     vals    = sbuf_a;
2694     rptr[0] = 0;
2695     for (i=0; i<rend-rstart; i++) {
2696       row    = i + rstart;
2697       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2698       ncols  = nzA + nzB;
2699       cworkB = b->j + b->i[i];
2700       aworkA = a->a + a->i[i];
2701       aworkB = b->a + b->i[i];
2702       lwrite = 0;
2703       for (l=0; l<nzB; l++) {
2704         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2705       }
2706       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2707       for (l=0; l<nzB; l++) {
2708         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2709       }
2710       vals     += ncols;
2711       rptr[i+1] = rptr[i] + ncols;
2712     }
2713   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2714 
2715   /* send nzlocal to others, and recv other's nzlocal */
2716   /*--------------------------------------------------*/
2717   if (reuse == MAT_INITIAL_MATRIX) {
2718     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2719 
2720     s_waits2 = s_waits3 + nsends;
2721     s_waits1 = s_waits2 + nsends;
2722     r_waits1 = s_waits1 + nsends;
2723     r_waits2 = r_waits1 + nrecvs;
2724     r_waits3 = r_waits2 + nrecvs;
2725   } else {
2726     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2727 
2728     r_waits3 = s_waits3 + nsends;
2729   }
2730 
2731   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2732   if (reuse == MAT_INITIAL_MATRIX) {
2733     /* get new tags to keep the communication clean */
2734     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2735     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2736     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2737 
2738     /* post receives of other's nzlocal */
2739     for (i=0; i<nrecvs; i++) {
2740       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2741     }
2742     /* send nzlocal to others */
2743     for (i=0; i<nsends; i++) {
2744       sbuf_nz[i] = nzlocal;
2745       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2746     }
2747     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2748     count = nrecvs;
2749     while (count) {
2750       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2751 
2752       recv_rank[imdex] = recv_status.MPI_SOURCE;
2753       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2754       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2755 
2756       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2757 
2758       rbuf_nz[imdex] += i + 2;
2759 
2760       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2761       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2762       count--;
2763     }
2764     /* wait on sends of nzlocal */
2765     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2766     /* send mat->i,j to others, and recv from other's */
2767     /*------------------------------------------------*/
2768     for (i=0; i<nsends; i++) {
2769       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2770       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2771     }
2772     /* wait on receives of mat->i,j */
2773     /*------------------------------*/
2774     count = nrecvs;
2775     while (count) {
2776       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2777       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2778       count--;
2779     }
2780     /* wait on sends of mat->i,j */
2781     /*---------------------------*/
2782     if (nsends) {
2783       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2784     }
2785   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2786 
2787   /* post receives, send and receive mat->a */
2788   /*----------------------------------------*/
2789   for (imdex=0; imdex<nrecvs; imdex++) {
2790     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2791   }
2792   for (i=0; i<nsends; i++) {
2793     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2794   }
2795   count = nrecvs;
2796   while (count) {
2797     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2798     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2799     count--;
2800   }
2801   if (nsends) {
2802     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2803   }
2804 
2805   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2806 
2807   /* create redundant matrix */
2808   /*-------------------------*/
2809   if (reuse == MAT_INITIAL_MATRIX) {
2810     /* compute rownz_max for preallocation */
2811     for (imdex=0; imdex<nrecvs; imdex++) {
2812       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2813       rptr = rbuf_j[imdex];
2814       for (i=0; i<j; i++) {
2815         ncols = rptr[i+1] - rptr[i];
2816         if (rownz_max < ncols) rownz_max = ncols;
2817       }
2818     }
2819 
2820     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2821     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2822     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2823     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2824     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2825     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2826   } else {
2827     C = *matredundant;
2828   }
2829 
2830   /* insert local matrix entries */
2831   rptr = sbuf_j;
2832   cols = sbuf_j + rend-rstart + 1;
2833   vals = sbuf_a;
2834   for (i=0; i<rend-rstart; i++) {
2835     row   = i + rstart;
2836     ncols = rptr[i+1] - rptr[i];
2837     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2838     vals += ncols;
2839     cols += ncols;
2840   }
2841   /* insert received matrix entries */
2842   for (imdex=0; imdex<nrecvs; imdex++) {
2843     rstart = rowrange[recv_rank[imdex]];
2844     rend   = rowrange[recv_rank[imdex]+1];
2845     rptr   = rbuf_j[imdex];
2846     cols   = rbuf_j[imdex] + rend-rstart + 1;
2847     vals   = rbuf_a[imdex];
2848     for (i=0; i<rend-rstart; i++) {
2849       row   = i + rstart;
2850       ncols = rptr[i+1] - rptr[i];
2851       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2852       vals += ncols;
2853       cols += ncols;
2854     }
2855   }
2856   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2857   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2858   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2859   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2860   if (reuse == MAT_INITIAL_MATRIX) {
2861     PetscContainer container;
2862     *matredundant = C;
2863     /* create a supporting struct and attach it to C for reuse */
2864     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2865     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2866     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2867     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2868     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2869     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2870 
2871     redund->nzlocal   = nzlocal;
2872     redund->nsends    = nsends;
2873     redund->nrecvs    = nrecvs;
2874     redund->send_rank = send_rank;
2875     redund->recv_rank = recv_rank;
2876     redund->sbuf_nz   = sbuf_nz;
2877     redund->rbuf_nz   = rbuf_nz;
2878     redund->sbuf_j    = sbuf_j;
2879     redund->sbuf_a    = sbuf_a;
2880     redund->rbuf_j    = rbuf_j;
2881     redund->rbuf_a    = rbuf_a;
2882 
2883     redund->Destroy = C->ops->destroy;
2884     C->ops->destroy = MatDestroy_MatRedundant;
2885   }
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 #undef __FUNCT__
2890 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2891 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2892 {
2893   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2894   PetscErrorCode ierr;
2895   PetscInt       i,*idxb = 0;
2896   PetscScalar    *va,*vb;
2897   Vec            vtmp;
2898 
2899   PetscFunctionBegin;
2900   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2901   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2902   if (idx) {
2903     for (i=0; i<A->rmap->n; i++) {
2904       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2905     }
2906   }
2907 
2908   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2909   if (idx) {
2910     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2911   }
2912   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2913   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2914 
2915   for (i=0; i<A->rmap->n; i++) {
2916     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2917       va[i] = vb[i];
2918       if (idx) idx[i] = a->garray[idxb[i]];
2919     }
2920   }
2921 
2922   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2923   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2924   ierr = PetscFree(idxb);CHKERRQ(ierr);
2925   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 #undef __FUNCT__
2930 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2931 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2932 {
2933   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2934   PetscErrorCode ierr;
2935   PetscInt       i,*idxb = 0;
2936   PetscScalar    *va,*vb;
2937   Vec            vtmp;
2938 
2939   PetscFunctionBegin;
2940   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2941   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2942   if (idx) {
2943     for (i=0; i<A->cmap->n; i++) {
2944       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2945     }
2946   }
2947 
2948   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2949   if (idx) {
2950     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2951   }
2952   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2953   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2954 
2955   for (i=0; i<A->rmap->n; i++) {
2956     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2957       va[i] = vb[i];
2958       if (idx) idx[i] = a->garray[idxb[i]];
2959     }
2960   }
2961 
2962   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2963   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2964   ierr = PetscFree(idxb);CHKERRQ(ierr);
2965   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2971 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2972 {
2973   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2974   PetscInt       n      = A->rmap->n;
2975   PetscInt       cstart = A->cmap->rstart;
2976   PetscInt       *cmap  = mat->garray;
2977   PetscInt       *diagIdx, *offdiagIdx;
2978   Vec            diagV, offdiagV;
2979   PetscScalar    *a, *diagA, *offdiagA;
2980   PetscInt       r;
2981   PetscErrorCode ierr;
2982 
2983   PetscFunctionBegin;
2984   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2985   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2986   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2987   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2988   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2990   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2991   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2992   for (r = 0; r < n; ++r) {
2993     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2994       a[r]   = diagA[r];
2995       idx[r] = cstart + diagIdx[r];
2996     } else {
2997       a[r]   = offdiagA[r];
2998       idx[r] = cmap[offdiagIdx[r]];
2999     }
3000   }
3001   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3002   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3003   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3004   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3005   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3006   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3012 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3013 {
3014   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3015   PetscInt       n      = A->rmap->n;
3016   PetscInt       cstart = A->cmap->rstart;
3017   PetscInt       *cmap  = mat->garray;
3018   PetscInt       *diagIdx, *offdiagIdx;
3019   Vec            diagV, offdiagV;
3020   PetscScalar    *a, *diagA, *offdiagA;
3021   PetscInt       r;
3022   PetscErrorCode ierr;
3023 
3024   PetscFunctionBegin;
3025   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3026   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3027   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3028   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3029   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3030   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3031   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3032   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3033   for (r = 0; r < n; ++r) {
3034     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3035       a[r]   = diagA[r];
3036       idx[r] = cstart + diagIdx[r];
3037     } else {
3038       a[r]   = offdiagA[r];
3039       idx[r] = cmap[offdiagIdx[r]];
3040     }
3041   }
3042   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3043   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3044   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3045   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3046   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3047   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3053 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3054 {
3055   PetscErrorCode ierr;
3056   Mat            *dummy;
3057 
3058   PetscFunctionBegin;
3059   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3060   *newmat = *dummy;
3061   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3069 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3070 {
3071   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3072   PetscErrorCode ierr;
3073 
3074   PetscFunctionBegin;
3075   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3081 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3082 {
3083   PetscErrorCode ierr;
3084   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3085 
3086   PetscFunctionBegin;
3087   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3088   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3089   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /* -------------------------------------------------------------------*/
3095 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3096        MatGetRow_MPIAIJ,
3097        MatRestoreRow_MPIAIJ,
3098        MatMult_MPIAIJ,
3099 /* 4*/ MatMultAdd_MPIAIJ,
3100        MatMultTranspose_MPIAIJ,
3101        MatMultTransposeAdd_MPIAIJ,
3102 #if defined(PETSC_HAVE_PBGL)
3103        MatSolve_MPIAIJ,
3104 #else
3105        0,
3106 #endif
3107        0,
3108        0,
3109 /*10*/ 0,
3110        0,
3111        0,
3112        MatSOR_MPIAIJ,
3113        MatTranspose_MPIAIJ,
3114 /*15*/ MatGetInfo_MPIAIJ,
3115        MatEqual_MPIAIJ,
3116        MatGetDiagonal_MPIAIJ,
3117        MatDiagonalScale_MPIAIJ,
3118        MatNorm_MPIAIJ,
3119 /*20*/ MatAssemblyBegin_MPIAIJ,
3120        MatAssemblyEnd_MPIAIJ,
3121        MatSetOption_MPIAIJ,
3122        MatZeroEntries_MPIAIJ,
3123 /*24*/ MatZeroRows_MPIAIJ,
3124        0,
3125 #if defined(PETSC_HAVE_PBGL)
3126        0,
3127 #else
3128        0,
3129 #endif
3130        0,
3131        0,
3132 /*29*/ MatSetUp_MPIAIJ,
3133 #if defined(PETSC_HAVE_PBGL)
3134        0,
3135 #else
3136        0,
3137 #endif
3138        0,
3139        0,
3140        0,
3141 /*34*/ MatDuplicate_MPIAIJ,
3142        0,
3143        0,
3144        0,
3145        0,
3146 /*39*/ MatAXPY_MPIAIJ,
3147        MatGetSubMatrices_MPIAIJ,
3148        MatIncreaseOverlap_MPIAIJ,
3149        MatGetValues_MPIAIJ,
3150        MatCopy_MPIAIJ,
3151 /*44*/ MatGetRowMax_MPIAIJ,
3152        MatScale_MPIAIJ,
3153        0,
3154        0,
3155        MatZeroRowsColumns_MPIAIJ,
3156 /*49*/ MatSetRandom_MPIAIJ,
3157        0,
3158        0,
3159        0,
3160        0,
3161 /*54*/ MatFDColoringCreate_MPIAIJ,
3162        0,
3163        MatSetUnfactored_MPIAIJ,
3164        MatPermute_MPIAIJ,
3165        0,
3166 /*59*/ MatGetSubMatrix_MPIAIJ,
3167        MatDestroy_MPIAIJ,
3168        MatView_MPIAIJ,
3169        0,
3170        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3171 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3172        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3173        0,
3174        0,
3175        0,
3176 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3177        MatGetRowMinAbs_MPIAIJ,
3178        0,
3179        MatSetColoring_MPIAIJ,
3180        0,
3181        MatSetValuesAdifor_MPIAIJ,
3182 /*75*/ MatFDColoringApply_AIJ,
3183        0,
3184        0,
3185        0,
3186        MatFindZeroDiagonals_MPIAIJ,
3187 /*80*/ 0,
3188        0,
3189        0,
3190 /*83*/ MatLoad_MPIAIJ,
3191        0,
3192        0,
3193        0,
3194        0,
3195        0,
3196 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3197        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3198        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3199        MatPtAP_MPIAIJ_MPIAIJ,
3200        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3201 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3202        0,
3203        0,
3204        0,
3205        0,
3206 /*99*/ 0,
3207        0,
3208        0,
3209        MatConjugate_MPIAIJ,
3210        0,
3211 /*104*/MatSetValuesRow_MPIAIJ,
3212        MatRealPart_MPIAIJ,
3213        MatImaginaryPart_MPIAIJ,
3214        0,
3215        0,
3216 /*109*/0,
3217        MatGetRedundantMatrix_MPIAIJ,
3218        MatGetRowMin_MPIAIJ,
3219        0,
3220        0,
3221 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3222        0,
3223        0,
3224        0,
3225        0,
3226 /*119*/0,
3227        0,
3228        0,
3229        0,
3230        MatGetMultiProcBlock_MPIAIJ,
3231 /*124*/MatFindNonzeroRows_MPIAIJ,
3232        MatGetColumnNorms_MPIAIJ,
3233        MatInvertBlockDiagonal_MPIAIJ,
3234        0,
3235        MatGetSubMatricesParallel_MPIAIJ,
3236 /*129*/0,
3237        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3238        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3239        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3240        0,
3241 /*134*/0,
3242        0,
3243        0,
3244        0,
3245        0
3246 };
3247 
3248 /* ----------------------------------------------------------------------------------------*/
3249 
3250 EXTERN_C_BEGIN
3251 #undef __FUNCT__
3252 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3253 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3254 {
3255   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3256   PetscErrorCode ierr;
3257 
3258   PetscFunctionBegin;
3259   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3260   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3261   PetscFunctionReturn(0);
3262 }
3263 EXTERN_C_END
3264 
3265 EXTERN_C_BEGIN
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3268 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3269 {
3270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3271   PetscErrorCode ierr;
3272 
3273   PetscFunctionBegin;
3274   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3275   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 EXTERN_C_END
3279 
3280 EXTERN_C_BEGIN
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3283 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3284 {
3285   Mat_MPIAIJ     *b;
3286   PetscErrorCode ierr;
3287   PetscInt       i;
3288   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3289 
3290   PetscFunctionBegin;
3291   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3292   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3293   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3294   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3295   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3296   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3297 
3298   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3299   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3300   if (d_nnz) {
3301     for (i=0; i<B->rmap->n; i++) {
3302       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3303     }
3304   }
3305   if (o_nnz) {
3306     for (i=0; i<B->rmap->n; i++) {
3307       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3308     }
3309   }
3310   b = (Mat_MPIAIJ*)B->data;
3311 
3312   if (!B->preallocated) {
3313     /* Explicitly create 2 MATSEQAIJ matrices. */
3314     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3315     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3316     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3317     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3318     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3319     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3320     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3321     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3322     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3323     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3324   }
3325 
3326   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3327   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3328   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3329   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3330   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3331   B->preallocated = PETSC_TRUE;
3332   PetscFunctionReturn(0);
3333 }
3334 EXTERN_C_END
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3338 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3339 {
3340   Mat            mat;
3341   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3342   PetscErrorCode ierr;
3343 
3344   PetscFunctionBegin;
3345   *newmat = 0;
3346   ierr    = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3347   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3348   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3349   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3350   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3351   a       = (Mat_MPIAIJ*)mat->data;
3352 
3353   mat->factortype   = matin->factortype;
3354   mat->rmap->bs     = matin->rmap->bs;
3355   mat->cmap->bs     = matin->cmap->bs;
3356   mat->assembled    = PETSC_TRUE;
3357   mat->insertmode   = NOT_SET_VALUES;
3358   mat->preallocated = PETSC_TRUE;
3359 
3360   a->size         = oldmat->size;
3361   a->rank         = oldmat->rank;
3362   a->donotstash   = oldmat->donotstash;
3363   a->roworiented  = oldmat->roworiented;
3364   a->rowindices   = 0;
3365   a->rowvalues    = 0;
3366   a->getrowactive = PETSC_FALSE;
3367 
3368   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3369   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3370 
3371   if (oldmat->colmap) {
3372 #if defined(PETSC_USE_CTABLE)
3373     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3374 #else
3375     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3377     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3378 #endif
3379   } else a->colmap = 0;
3380   if (oldmat->garray) {
3381     PetscInt len;
3382     len  = oldmat->B->cmap->n;
3383     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3384     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3385     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3386   } else a->garray = 0;
3387 
3388   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3389   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3390   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3391   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3392   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3393   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3394   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3395   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3396   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3397   *newmat = mat;
3398   PetscFunctionReturn(0);
3399 }
3400 
3401 
3402 
3403 #undef __FUNCT__
3404 #define __FUNCT__ "MatLoad_MPIAIJ"
3405 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3406 {
3407   PetscScalar    *vals,*svals;
3408   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3409   PetscErrorCode ierr;
3410   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3411   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3412   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3413   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3414   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3415   int            fd;
3416   PetscInt       bs = 1;
3417 
3418   PetscFunctionBegin;
3419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3420   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3421   if (!rank) {
3422     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3423     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3424     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3425   }
3426 
3427   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3428   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3429   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3430 
3431   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3432 
3433   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3434   M    = header[1]; N = header[2];
3435   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3436   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3437   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3438 
3439   /* If global sizes are set, check if they are consistent with that given in the file */
3440   if (sizesset) {
3441     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3442   }
3443   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3444   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3445 
3446   /* determine ownership of all (block) rows */
3447   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3448   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3449   else m = newMat->rmap->n; /* Set by user */
3450 
3451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3452   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3453 
3454   /* First process needs enough room for process with most rows */
3455   if (!rank) {
3456     mmax = rowners[1];
3457     for (i=2; i<=size; i++) {
3458       mmax = PetscMax(mmax, rowners[i]);
3459     }
3460   } else mmax = m;
3461 
3462   rowners[0] = 0;
3463   for (i=2; i<=size; i++) {
3464     rowners[i] += rowners[i-1];
3465   }
3466   rstart = rowners[rank];
3467   rend   = rowners[rank+1];
3468 
3469   /* distribute row lengths to all processors */
3470   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3471   if (!rank) {
3472     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3473     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3474     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3475     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3476     for (j=0; j<m; j++) {
3477       procsnz[0] += ourlens[j];
3478     }
3479     for (i=1; i<size; i++) {
3480       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3481       /* calculate the number of nonzeros on each processor */
3482       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3483         procsnz[i] += rowlengths[j];
3484       }
3485       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3486     }
3487     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3488   } else {
3489     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3490   }
3491 
3492   if (!rank) {
3493     /* determine max buffer needed and allocate it */
3494     maxnz = 0;
3495     for (i=0; i<size; i++) {
3496       maxnz = PetscMax(maxnz,procsnz[i]);
3497     }
3498     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3499 
3500     /* read in my part of the matrix column indices  */
3501     nz   = procsnz[0];
3502     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3503     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3504 
3505     /* read in every one elses and ship off */
3506     for (i=1; i<size; i++) {
3507       nz   = procsnz[i];
3508       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3509       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3510     }
3511     ierr = PetscFree(cols);CHKERRQ(ierr);
3512   } else {
3513     /* determine buffer space needed for message */
3514     nz = 0;
3515     for (i=0; i<m; i++) {
3516       nz += ourlens[i];
3517     }
3518     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3519 
3520     /* receive message of column indices*/
3521     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3522   }
3523 
3524   /* determine column ownership if matrix is not square */
3525   if (N != M) {
3526     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3527     else n = newMat->cmap->n;
3528     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3529     cstart = cend - n;
3530   } else {
3531     cstart = rstart;
3532     cend   = rend;
3533     n      = cend - cstart;
3534   }
3535 
3536   /* loop over local rows, determining number of off diagonal entries */
3537   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3538   jj   = 0;
3539   for (i=0; i<m; i++) {
3540     for (j=0; j<ourlens[i]; j++) {
3541       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3542       jj++;
3543     }
3544   }
3545 
3546   for (i=0; i<m; i++) {
3547     ourlens[i] -= offlens[i];
3548   }
3549   if (!sizesset) {
3550     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3551   }
3552 
3553   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3554 
3555   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3556 
3557   for (i=0; i<m; i++) {
3558     ourlens[i] += offlens[i];
3559   }
3560 
3561   if (!rank) {
3562     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3563 
3564     /* read in my part of the matrix numerical values  */
3565     nz   = procsnz[0];
3566     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3567 
3568     /* insert into matrix */
3569     jj      = rstart;
3570     smycols = mycols;
3571     svals   = vals;
3572     for (i=0; i<m; i++) {
3573       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3574       smycols += ourlens[i];
3575       svals   += ourlens[i];
3576       jj++;
3577     }
3578 
3579     /* read in other processors and ship out */
3580     for (i=1; i<size; i++) {
3581       nz   = procsnz[i];
3582       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3583       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3584     }
3585     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3586   } else {
3587     /* receive numeric values */
3588     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3589 
3590     /* receive message of values*/
3591     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3592 
3593     /* insert into matrix */
3594     jj      = rstart;
3595     smycols = mycols;
3596     svals   = vals;
3597     for (i=0; i<m; i++) {
3598       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3599       smycols += ourlens[i];
3600       svals   += ourlens[i];
3601       jj++;
3602     }
3603   }
3604   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3605   ierr = PetscFree(vals);CHKERRQ(ierr);
3606   ierr = PetscFree(mycols);CHKERRQ(ierr);
3607   ierr = PetscFree(rowners);CHKERRQ(ierr);
3608   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3609   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 #undef __FUNCT__
3614 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3615 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3616 {
3617   PetscErrorCode ierr;
3618   IS             iscol_local;
3619   PetscInt       csize;
3620 
3621   PetscFunctionBegin;
3622   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3623   if (call == MAT_REUSE_MATRIX) {
3624     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3625     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3626   } else {
3627     PetscInt cbs;
3628     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3629     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3630     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3631   }
3632   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3633   if (call == MAT_INITIAL_MATRIX) {
3634     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3635     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3636   }
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3641 #undef __FUNCT__
3642 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3643 /*
3644     Not great since it makes two copies of the submatrix, first an SeqAIJ
3645   in local and then by concatenating the local matrices the end result.
3646   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3647 
3648   Note: This requires a sequential iscol with all indices.
3649 */
3650 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3651 {
3652   PetscErrorCode ierr;
3653   PetscMPIInt    rank,size;
3654   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3655   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3656   PetscBool      allcolumns, colflag;
3657   Mat            M,Mreuse;
3658   MatScalar      *vwork,*aa;
3659   MPI_Comm       comm = ((PetscObject)mat)->comm;
3660   Mat_SeqAIJ     *aij;
3661 
3662 
3663   PetscFunctionBegin;
3664   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3666 
3667   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3668   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3669   if (colflag && ncol == mat->cmap->N) {
3670     allcolumns = PETSC_TRUE;
3671   } else {
3672     allcolumns = PETSC_FALSE;
3673   }
3674   if (call ==  MAT_REUSE_MATRIX) {
3675     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3676     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3677     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3678   } else {
3679     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3680   }
3681 
3682   /*
3683       m - number of local rows
3684       n - number of columns (same on all processors)
3685       rstart - first row in new global matrix generated
3686   */
3687   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3688   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3689   if (call == MAT_INITIAL_MATRIX) {
3690     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3691     ii  = aij->i;
3692     jj  = aij->j;
3693 
3694     /*
3695         Determine the number of non-zeros in the diagonal and off-diagonal
3696         portions of the matrix in order to do correct preallocation
3697     */
3698 
3699     /* first get start and end of "diagonal" columns */
3700     if (csize == PETSC_DECIDE) {
3701       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3702       if (mglobal == n) { /* square matrix */
3703         nlocal = m;
3704       } else {
3705         nlocal = n/size + ((n % size) > rank);
3706       }
3707     } else {
3708       nlocal = csize;
3709     }
3710     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3711     rstart = rend - nlocal;
3712     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3713 
3714     /* next, compute all the lengths */
3715     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3716     olens = dlens + m;
3717     for (i=0; i<m; i++) {
3718       jend = ii[i+1] - ii[i];
3719       olen = 0;
3720       dlen = 0;
3721       for (j=0; j<jend; j++) {
3722         if (*jj < rstart || *jj >= rend) olen++;
3723         else dlen++;
3724         jj++;
3725       }
3726       olens[i] = olen;
3727       dlens[i] = dlen;
3728     }
3729     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3730     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3731     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3732     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3733     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3734     ierr = PetscFree(dlens);CHKERRQ(ierr);
3735   } else {
3736     PetscInt ml,nl;
3737 
3738     M    = *newmat;
3739     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3740     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3741     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3742     /*
3743          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3744        rather than the slower MatSetValues().
3745     */
3746     M->was_assembled = PETSC_TRUE;
3747     M->assembled     = PETSC_FALSE;
3748   }
3749   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3750   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3751   ii   = aij->i;
3752   jj   = aij->j;
3753   aa   = aij->a;
3754   for (i=0; i<m; i++) {
3755     row   = rstart + i;
3756     nz    = ii[i+1] - ii[i];
3757     cwork = jj;     jj += nz;
3758     vwork = aa;     aa += nz;
3759     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3760   }
3761 
3762   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   *newmat = M;
3765 
3766   /* save submatrix used in processor for next request */
3767   if (call ==  MAT_INITIAL_MATRIX) {
3768     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3769     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3770   }
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 EXTERN_C_BEGIN
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3777 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3778 {
3779   PetscInt       m,cstart, cend,j,nnz,i,d;
3780   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3781   const PetscInt *JJ;
3782   PetscScalar    *values;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3787 
3788   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3789   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3790   m      = B->rmap->n;
3791   cstart = B->cmap->rstart;
3792   cend   = B->cmap->rend;
3793   rstart = B->rmap->rstart;
3794 
3795   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3796 
3797 #if defined(PETSC_USE_DEBUGGING)
3798   for (i=0; i<m; i++) {
3799     nnz = Ii[i+1]- Ii[i];
3800     JJ  = J + Ii[i];
3801     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3802     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3803     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3804   }
3805 #endif
3806 
3807   for (i=0; i<m; i++) {
3808     nnz     = Ii[i+1]- Ii[i];
3809     JJ      = J + Ii[i];
3810     nnz_max = PetscMax(nnz_max,nnz);
3811     d       = 0;
3812     for (j=0; j<nnz; j++) {
3813       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3814     }
3815     d_nnz[i] = d;
3816     o_nnz[i] = nnz - d;
3817   }
3818   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3819   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3820 
3821   if (v) values = (PetscScalar*)v;
3822   else {
3823     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3824     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3825   }
3826 
3827   for (i=0; i<m; i++) {
3828     ii   = i + rstart;
3829     nnz  = Ii[i+1]- Ii[i];
3830     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3831   }
3832   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834 
3835   if (!v) {
3836     ierr = PetscFree(values);CHKERRQ(ierr);
3837   }
3838   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 EXTERN_C_END
3842 
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3845 /*@
3846    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3847    (the default parallel PETSc format).
3848 
3849    Collective on MPI_Comm
3850 
3851    Input Parameters:
3852 +  B - the matrix
3853 .  i - the indices into j for the start of each local row (starts with zero)
3854 .  j - the column indices for each local row (starts with zero)
3855 -  v - optional values in the matrix
3856 
3857    Level: developer
3858 
3859    Notes:
3860        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3861      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3862      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3863 
3864        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3865 
3866        The format which is used for the sparse matrix input, is equivalent to a
3867     row-major ordering.. i.e for the following matrix, the input data expected is
3868     as shown:
3869 
3870         1 0 0
3871         2 0 3     P0
3872        -------
3873         4 5 6     P1
3874 
3875      Process0 [P0]: rows_owned=[0,1]
3876         i =  {0,1,3}  [size = nrow+1  = 2+1]
3877         j =  {0,0,2}  [size = nz = 6]
3878         v =  {1,2,3}  [size = nz = 6]
3879 
3880      Process1 [P1]: rows_owned=[2]
3881         i =  {0,3}    [size = nrow+1  = 1+1]
3882         j =  {0,1,2}  [size = nz = 6]
3883         v =  {4,5,6}  [size = nz = 6]
3884 
3885 .keywords: matrix, aij, compressed row, sparse, parallel
3886 
3887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3888           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3889 @*/
3890 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3891 {
3892   PetscErrorCode ierr;
3893 
3894   PetscFunctionBegin;
3895   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 #undef __FUNCT__
3900 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3901 /*@C
3902    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3903    (the default parallel PETSc format).  For good matrix assembly performance
3904    the user should preallocate the matrix storage by setting the parameters
3905    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3906    performance can be increased by more than a factor of 50.
3907 
3908    Collective on MPI_Comm
3909 
3910    Input Parameters:
3911 +  A - the matrix
3912 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3913            (same value is used for all local rows)
3914 .  d_nnz - array containing the number of nonzeros in the various rows of the
3915            DIAGONAL portion of the local submatrix (possibly different for each row)
3916            or NULL, if d_nz is used to specify the nonzero structure.
3917            The size of this array is equal to the number of local rows, i.e 'm'.
3918            For matrices that will be factored, you must leave room for (and set)
3919            the diagonal entry even if it is zero.
3920 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3921            submatrix (same value is used for all local rows).
3922 -  o_nnz - array containing the number of nonzeros in the various rows of the
3923            OFF-DIAGONAL portion of the local submatrix (possibly different for
3924            each row) or NULL, if o_nz is used to specify the nonzero
3925            structure. The size of this array is equal to the number
3926            of local rows, i.e 'm'.
3927 
3928    If the *_nnz parameter is given then the *_nz parameter is ignored
3929 
3930    The AIJ format (also called the Yale sparse matrix format or
3931    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3932    storage.  The stored row and column indices begin with zero.
3933    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3934 
3935    The parallel matrix is partitioned such that the first m0 rows belong to
3936    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3937    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3938 
3939    The DIAGONAL portion of the local submatrix of a processor can be defined
3940    as the submatrix which is obtained by extraction the part corresponding to
3941    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3942    first row that belongs to the processor, r2 is the last row belonging to
3943    the this processor, and c1-c2 is range of indices of the local part of a
3944    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3945    common case of a square matrix, the row and column ranges are the same and
3946    the DIAGONAL part is also square. The remaining portion of the local
3947    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3948 
3949    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3950 
3951    You can call MatGetInfo() to get information on how effective the preallocation was;
3952    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3953    You can also run with the option -info and look for messages with the string
3954    malloc in them to see if additional memory allocation was needed.
3955 
3956    Example usage:
3957 
3958    Consider the following 8x8 matrix with 34 non-zero values, that is
3959    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3960    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3961    as follows:
3962 
3963 .vb
3964             1  2  0  |  0  3  0  |  0  4
3965     Proc0   0  5  6  |  7  0  0  |  8  0
3966             9  0 10  | 11  0  0  | 12  0
3967     -------------------------------------
3968            13  0 14  | 15 16 17  |  0  0
3969     Proc1   0 18  0  | 19 20 21  |  0  0
3970             0  0  0  | 22 23  0  | 24  0
3971     -------------------------------------
3972     Proc2  25 26 27  |  0  0 28  | 29  0
3973            30  0  0  | 31 32 33  |  0 34
3974 .ve
3975 
3976    This can be represented as a collection of submatrices as:
3977 
3978 .vb
3979       A B C
3980       D E F
3981       G H I
3982 .ve
3983 
3984    Where the submatrices A,B,C are owned by proc0, D,E,F are
3985    owned by proc1, G,H,I are owned by proc2.
3986 
3987    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3988    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3989    The 'M','N' parameters are 8,8, and have the same values on all procs.
3990 
3991    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3992    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3993    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3994    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3995    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3996    matrix, ans [DF] as another SeqAIJ matrix.
3997 
3998    When d_nz, o_nz parameters are specified, d_nz storage elements are
3999    allocated for every row of the local diagonal submatrix, and o_nz
4000    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4001    One way to choose d_nz and o_nz is to use the max nonzerors per local
4002    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4003    In this case, the values of d_nz,o_nz are:
4004 .vb
4005      proc0 : dnz = 2, o_nz = 2
4006      proc1 : dnz = 3, o_nz = 2
4007      proc2 : dnz = 1, o_nz = 4
4008 .ve
4009    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4010    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4011    for proc3. i.e we are using 12+15+10=37 storage locations to store
4012    34 values.
4013 
4014    When d_nnz, o_nnz parameters are specified, the storage is specified
4015    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4016    In the above case the values for d_nnz,o_nnz are:
4017 .vb
4018      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4019      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4020      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4021 .ve
4022    Here the space allocated is sum of all the above values i.e 34, and
4023    hence pre-allocation is perfect.
4024 
4025    Level: intermediate
4026 
4027 .keywords: matrix, aij, compressed row, sparse, parallel
4028 
4029 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4030           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4031 @*/
4032 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4033 {
4034   PetscErrorCode ierr;
4035 
4036   PetscFunctionBegin;
4037   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4038   PetscValidType(B,1);
4039   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 #undef __FUNCT__
4044 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4045 /*@
4046      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4047          CSR format the local rows.
4048 
4049    Collective on MPI_Comm
4050 
4051    Input Parameters:
4052 +  comm - MPI communicator
4053 .  m - number of local rows (Cannot be PETSC_DECIDE)
4054 .  n - This value should be the same as the local size used in creating the
4055        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4056        calculated if N is given) For square matrices n is almost always m.
4057 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4058 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4059 .   i - row indices
4060 .   j - column indices
4061 -   a - matrix values
4062 
4063    Output Parameter:
4064 .   mat - the matrix
4065 
4066    Level: intermediate
4067 
4068    Notes:
4069        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4070      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4071      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4072 
4073        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4074 
4075        The format which is used for the sparse matrix input, is equivalent to a
4076     row-major ordering.. i.e for the following matrix, the input data expected is
4077     as shown:
4078 
4079         1 0 0
4080         2 0 3     P0
4081        -------
4082         4 5 6     P1
4083 
4084      Process0 [P0]: rows_owned=[0,1]
4085         i =  {0,1,3}  [size = nrow+1  = 2+1]
4086         j =  {0,0,2}  [size = nz = 6]
4087         v =  {1,2,3}  [size = nz = 6]
4088 
4089      Process1 [P1]: rows_owned=[2]
4090         i =  {0,3}    [size = nrow+1  = 1+1]
4091         j =  {0,1,2}  [size = nz = 6]
4092         v =  {4,5,6}  [size = nz = 6]
4093 
4094 .keywords: matrix, aij, compressed row, sparse, parallel
4095 
4096 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4097           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4098 @*/
4099 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4100 {
4101   PetscErrorCode ierr;
4102 
4103   PetscFunctionBegin;
4104   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4105   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4106   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4107   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4108   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4109   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4110   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4111   PetscFunctionReturn(0);
4112 }
4113 
4114 #undef __FUNCT__
4115 #define __FUNCT__ "MatCreateAIJ"
4116 /*@C
4117    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4118    (the default parallel PETSc format).  For good matrix assembly performance
4119    the user should preallocate the matrix storage by setting the parameters
4120    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4121    performance can be increased by more than a factor of 50.
4122 
4123    Collective on MPI_Comm
4124 
4125    Input Parameters:
4126 +  comm - MPI communicator
4127 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4128            This value should be the same as the local size used in creating the
4129            y vector for the matrix-vector product y = Ax.
4130 .  n - This value should be the same as the local size used in creating the
4131        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4132        calculated if N is given) For square matrices n is almost always m.
4133 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4134 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4135 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4136            (same value is used for all local rows)
4137 .  d_nnz - array containing the number of nonzeros in the various rows of the
4138            DIAGONAL portion of the local submatrix (possibly different for each row)
4139            or NULL, if d_nz is used to specify the nonzero structure.
4140            The size of this array is equal to the number of local rows, i.e 'm'.
4141 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4142            submatrix (same value is used for all local rows).
4143 -  o_nnz - array containing the number of nonzeros in the various rows of the
4144            OFF-DIAGONAL portion of the local submatrix (possibly different for
4145            each row) or NULL, if o_nz is used to specify the nonzero
4146            structure. The size of this array is equal to the number
4147            of local rows, i.e 'm'.
4148 
4149    Output Parameter:
4150 .  A - the matrix
4151 
4152    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4153    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4154    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4155 
4156    Notes:
4157    If the *_nnz parameter is given then the *_nz parameter is ignored
4158 
4159    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4160    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4161    storage requirements for this matrix.
4162 
4163    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4164    processor than it must be used on all processors that share the object for
4165    that argument.
4166 
4167    The user MUST specify either the local or global matrix dimensions
4168    (possibly both).
4169 
4170    The parallel matrix is partitioned across processors such that the
4171    first m0 rows belong to process 0, the next m1 rows belong to
4172    process 1, the next m2 rows belong to process 2 etc.. where
4173    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4174    values corresponding to [m x N] submatrix.
4175 
4176    The columns are logically partitioned with the n0 columns belonging
4177    to 0th partition, the next n1 columns belonging to the next
4178    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4179 
4180    The DIAGONAL portion of the local submatrix on any given processor
4181    is the submatrix corresponding to the rows and columns m,n
4182    corresponding to the given processor. i.e diagonal matrix on
4183    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4184    etc. The remaining portion of the local submatrix [m x (N-n)]
4185    constitute the OFF-DIAGONAL portion. The example below better
4186    illustrates this concept.
4187 
4188    For a square global matrix we define each processor's diagonal portion
4189    to be its local rows and the corresponding columns (a square submatrix);
4190    each processor's off-diagonal portion encompasses the remainder of the
4191    local matrix (a rectangular submatrix).
4192 
4193    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4194 
4195    When calling this routine with a single process communicator, a matrix of
4196    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4197    type of communicator, use the construction mechanism:
4198      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4199 
4200    By default, this format uses inodes (identical nodes) when possible.
4201    We search for consecutive rows with the same nonzero structure, thereby
4202    reusing matrix information to achieve increased efficiency.
4203 
4204    Options Database Keys:
4205 +  -mat_no_inode  - Do not use inodes
4206 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4207 -  -mat_aij_oneindex - Internally use indexing starting at 1
4208         rather than 0.  Note that when calling MatSetValues(),
4209         the user still MUST index entries starting at 0!
4210 
4211 
4212    Example usage:
4213 
4214    Consider the following 8x8 matrix with 34 non-zero values, that is
4215    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4216    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4217    as follows:
4218 
4219 .vb
4220             1  2  0  |  0  3  0  |  0  4
4221     Proc0   0  5  6  |  7  0  0  |  8  0
4222             9  0 10  | 11  0  0  | 12  0
4223     -------------------------------------
4224            13  0 14  | 15 16 17  |  0  0
4225     Proc1   0 18  0  | 19 20 21  |  0  0
4226             0  0  0  | 22 23  0  | 24  0
4227     -------------------------------------
4228     Proc2  25 26 27  |  0  0 28  | 29  0
4229            30  0  0  | 31 32 33  |  0 34
4230 .ve
4231 
4232    This can be represented as a collection of submatrices as:
4233 
4234 .vb
4235       A B C
4236       D E F
4237       G H I
4238 .ve
4239 
4240    Where the submatrices A,B,C are owned by proc0, D,E,F are
4241    owned by proc1, G,H,I are owned by proc2.
4242 
4243    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4244    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4245    The 'M','N' parameters are 8,8, and have the same values on all procs.
4246 
4247    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4248    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4249    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4250    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4251    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4252    matrix, ans [DF] as another SeqAIJ matrix.
4253 
4254    When d_nz, o_nz parameters are specified, d_nz storage elements are
4255    allocated for every row of the local diagonal submatrix, and o_nz
4256    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4257    One way to choose d_nz and o_nz is to use the max nonzerors per local
4258    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4259    In this case, the values of d_nz,o_nz are:
4260 .vb
4261      proc0 : dnz = 2, o_nz = 2
4262      proc1 : dnz = 3, o_nz = 2
4263      proc2 : dnz = 1, o_nz = 4
4264 .ve
4265    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4266    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4267    for proc3. i.e we are using 12+15+10=37 storage locations to store
4268    34 values.
4269 
4270    When d_nnz, o_nnz parameters are specified, the storage is specified
4271    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4272    In the above case the values for d_nnz,o_nnz are:
4273 .vb
4274      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4275      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4276      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4277 .ve
4278    Here the space allocated is sum of all the above values i.e 34, and
4279    hence pre-allocation is perfect.
4280 
4281    Level: intermediate
4282 
4283 .keywords: matrix, aij, compressed row, sparse, parallel
4284 
4285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4286           MPIAIJ, MatCreateMPIAIJWithArrays()
4287 @*/
4288 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4289 {
4290   PetscErrorCode ierr;
4291   PetscMPIInt    size;
4292 
4293   PetscFunctionBegin;
4294   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4295   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4297   if (size > 1) {
4298     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4299     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4300   } else {
4301     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4302     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4303   }
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4309 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4310 {
4311   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4312 
4313   PetscFunctionBegin;
4314   *Ad     = a->A;
4315   *Ao     = a->B;
4316   *colmap = a->garray;
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4322 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4323 {
4324   PetscErrorCode ierr;
4325   PetscInt       i;
4326   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4327 
4328   PetscFunctionBegin;
4329   if (coloring->ctype == IS_COLORING_GLOBAL) {
4330     ISColoringValue *allcolors,*colors;
4331     ISColoring      ocoloring;
4332 
4333     /* set coloring for diagonal portion */
4334     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4335 
4336     /* set coloring for off-diagonal portion */
4337     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4338     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4339     for (i=0; i<a->B->cmap->n; i++) {
4340       colors[i] = allcolors[a->garray[i]];
4341     }
4342     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4343     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4344     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4345     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4346   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4347     ISColoringValue *colors;
4348     PetscInt        *larray;
4349     ISColoring      ocoloring;
4350 
4351     /* set coloring for diagonal portion */
4352     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4353     for (i=0; i<a->A->cmap->n; i++) {
4354       larray[i] = i + A->cmap->rstart;
4355     }
4356     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4357     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4358     for (i=0; i<a->A->cmap->n; i++) {
4359       colors[i] = coloring->colors[larray[i]];
4360     }
4361     ierr = PetscFree(larray);CHKERRQ(ierr);
4362     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4363     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4364     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4365 
4366     /* set coloring for off-diagonal portion */
4367     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4368     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4369     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4370     for (i=0; i<a->B->cmap->n; i++) {
4371       colors[i] = coloring->colors[larray[i]];
4372     }
4373     ierr = PetscFree(larray);CHKERRQ(ierr);
4374     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4375     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4376     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4377   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4383 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4384 {
4385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4386   PetscErrorCode ierr;
4387 
4388   PetscFunctionBegin;
4389   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4390   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4391   PetscFunctionReturn(0);
4392 }
4393 
4394 #undef __FUNCT__
4395 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4396 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4397 {
4398   PetscErrorCode ierr;
4399   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4400   PetscInt       *indx;
4401 
4402   PetscFunctionBegin;
4403   /* This routine will ONLY return MPIAIJ type matrix */
4404   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4405   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4406   if (n == PETSC_DECIDE) {
4407     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4408   }
4409   /* Check sum(n) = N */
4410   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4411   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4412 
4413   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4414   rstart -= m;
4415 
4416   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4417   for (i=0; i<m; i++) {
4418     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4420     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4421   }
4422 
4423   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4424   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4425   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4426   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4427   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4428   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4434 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4435 {
4436   PetscErrorCode ierr;
4437   PetscInt       m,N,i,rstart,nnz,Ii;
4438   PetscInt       *indx;
4439   PetscScalar    *values;
4440 
4441   PetscFunctionBegin;
4442   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4443   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4444   for (i=0; i<m; i++) {
4445     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4446     Ii   = i + rstart;
4447     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4448     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4449   }
4450   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4451   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452   PetscFunctionReturn(0);
4453 }
4454 
4455 #undef __FUNCT__
4456 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4457 /*@
4458       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4459                  matrices from each processor
4460 
4461     Collective on MPI_Comm
4462 
4463    Input Parameters:
4464 +    comm - the communicators the parallel matrix will live on
4465 .    inmat - the input sequential matrices
4466 .    n - number of local columns (or PETSC_DECIDE)
4467 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4468 
4469    Output Parameter:
4470 .    outmat - the parallel matrix generated
4471 
4472     Level: advanced
4473 
4474    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4475 
4476 @*/
4477 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4478 {
4479   PetscErrorCode ierr;
4480 
4481   PetscFunctionBegin;
4482   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4483   if (scall == MAT_INITIAL_MATRIX) {
4484     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4485   }
4486   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4487   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 #undef __FUNCT__
4492 #define __FUNCT__ "MatFileSplit"
4493 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4494 {
4495   PetscErrorCode    ierr;
4496   PetscMPIInt       rank;
4497   PetscInt          m,N,i,rstart,nnz;
4498   size_t            len;
4499   const PetscInt    *indx;
4500   PetscViewer       out;
4501   char              *name;
4502   Mat               B;
4503   const PetscScalar *values;
4504 
4505   PetscFunctionBegin;
4506   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4507   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4508   /* Should this be the type of the diagonal block of A? */
4509   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4510   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4511   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4512   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4513   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4514   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4515   for (i=0; i<m; i++) {
4516     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4517     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4518     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4519   }
4520   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4522 
4523   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4524   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4525   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4526   sprintf(name,"%s.%d",outfile,rank);
4527   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4528   ierr = PetscFree(name);CHKERRQ(ierr);
4529   ierr = MatView(B,out);CHKERRQ(ierr);
4530   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4531   ierr = MatDestroy(&B);CHKERRQ(ierr);
4532   PetscFunctionReturn(0);
4533 }
4534 
4535 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4536 #undef __FUNCT__
4537 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4538 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4539 {
4540   PetscErrorCode      ierr;
4541   Mat_Merge_SeqsToMPI *merge;
4542   PetscContainer      container;
4543 
4544   PetscFunctionBegin;
4545   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4546   if (container) {
4547     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4560     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4561     ierr = PetscFree(merge);CHKERRQ(ierr);
4562     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4563   }
4564   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #include <../src/mat/utils/freespace.h>
4569 #include <petscbt.h>
4570 
4571 #undef __FUNCT__
4572 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4573 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4574 {
4575   PetscErrorCode      ierr;
4576   MPI_Comm            comm=((PetscObject)mpimat)->comm;
4577   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4578   PetscMPIInt         size,rank,taga,*len_s;
4579   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4580   PetscInt            proc,m;
4581   PetscInt            **buf_ri,**buf_rj;
4582   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4583   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4584   MPI_Request         *s_waits,*r_waits;
4585   MPI_Status          *status;
4586   MatScalar           *aa=a->a;
4587   MatScalar           **abuf_r,*ba_i;
4588   Mat_Merge_SeqsToMPI *merge;
4589   PetscContainer      container;
4590 
4591   PetscFunctionBegin;
4592   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4593 
4594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4595   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4596 
4597   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4598   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4599 
4600   bi     = merge->bi;
4601   bj     = merge->bj;
4602   buf_ri = merge->buf_ri;
4603   buf_rj = merge->buf_rj;
4604 
4605   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4606   owners = merge->rowmap->range;
4607   len_s  = merge->len_s;
4608 
4609   /* send and recv matrix values */
4610   /*-----------------------------*/
4611   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4612   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4613 
4614   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4615   for (proc=0,k=0; proc<size; proc++) {
4616     if (!len_s[proc]) continue;
4617     i    = owners[proc];
4618     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4619     k++;
4620   }
4621 
4622   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4623   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4624   ierr = PetscFree(status);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4627   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4628 
4629   /* insert mat values of mpimat */
4630   /*----------------------------*/
4631   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4632   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4633 
4634   for (k=0; k<merge->nrecv; k++) {
4635     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4636     nrows       = *(buf_ri_k[k]);
4637     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4638     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4639   }
4640 
4641   /* set values of ba */
4642   m = merge->rowmap->n;
4643   for (i=0; i<m; i++) {
4644     arow = owners[rank] + i;
4645     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4646     bnzi = bi[i+1] - bi[i];
4647     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4648 
4649     /* add local non-zero vals of this proc's seqmat into ba */
4650     anzi   = ai[arow+1] - ai[arow];
4651     aj     = a->j + ai[arow];
4652     aa     = a->a + ai[arow];
4653     nextaj = 0;
4654     for (j=0; nextaj<anzi; j++) {
4655       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4656         ba_i[j] += aa[nextaj++];
4657       }
4658     }
4659 
4660     /* add received vals into ba */
4661     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4662       /* i-th row */
4663       if (i == *nextrow[k]) {
4664         anzi   = *(nextai[k]+1) - *nextai[k];
4665         aj     = buf_rj[k] + *(nextai[k]);
4666         aa     = abuf_r[k] + *(nextai[k]);
4667         nextaj = 0;
4668         for (j=0; nextaj<anzi; j++) {
4669           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4670             ba_i[j] += aa[nextaj++];
4671           }
4672         }
4673         nextrow[k]++; nextai[k]++;
4674       }
4675     }
4676     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4677   }
4678   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4680 
4681   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4682   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4683   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4684   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4685   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4686   PetscFunctionReturn(0);
4687 }
4688 
4689 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4690 
4691 #undef __FUNCT__
4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4693 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4694 {
4695   PetscErrorCode      ierr;
4696   Mat                 B_mpi;
4697   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4698   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4699   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4700   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4701   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4702   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4703   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4704   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4705   MPI_Status          *status;
4706   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4707   PetscBT             lnkbt;
4708   Mat_Merge_SeqsToMPI *merge;
4709   PetscContainer      container;
4710 
4711   PetscFunctionBegin;
4712   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4713 
4714   /* make sure it is a PETSc comm */
4715   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4718 
4719   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4720   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4721 
4722   /* determine row ownership */
4723   /*---------------------------------------------------------*/
4724   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4725   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4726   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4727   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4728   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4729   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4730   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4731 
4732   m      = merge->rowmap->n;
4733   owners = merge->rowmap->range;
4734 
4735   /* determine the number of messages to send, their lengths */
4736   /*---------------------------------------------------------*/
4737   len_s = merge->len_s;
4738 
4739   len          = 0; /* length of buf_si[] */
4740   merge->nsend = 0;
4741   for (proc=0; proc<size; proc++) {
4742     len_si[proc] = 0;
4743     if (proc == rank) {
4744       len_s[proc] = 0;
4745     } else {
4746       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4747       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4748     }
4749     if (len_s[proc]) {
4750       merge->nsend++;
4751       nrows = 0;
4752       for (i=owners[proc]; i<owners[proc+1]; i++) {
4753         if (ai[i+1] > ai[i]) nrows++;
4754       }
4755       len_si[proc] = 2*(nrows+1);
4756       len         += len_si[proc];
4757     }
4758   }
4759 
4760   /* determine the number and length of messages to receive for ij-structure */
4761   /*-------------------------------------------------------------------------*/
4762   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4763   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4764 
4765   /* post the Irecv of j-structure */
4766   /*-------------------------------*/
4767   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4768   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4769 
4770   /* post the Isend of j-structure */
4771   /*--------------------------------*/
4772   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4773 
4774   for (proc=0, k=0; proc<size; proc++) {
4775     if (!len_s[proc]) continue;
4776     i    = owners[proc];
4777     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4778     k++;
4779   }
4780 
4781   /* receives and sends of j-structure are complete */
4782   /*------------------------------------------------*/
4783   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4784   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4785 
4786   /* send and recv i-structure */
4787   /*---------------------------*/
4788   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4789   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4790 
4791   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4792   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4793   for (proc=0,k=0; proc<size; proc++) {
4794     if (!len_s[proc]) continue;
4795     /* form outgoing message for i-structure:
4796          buf_si[0]:                 nrows to be sent
4797                [1:nrows]:           row index (global)
4798                [nrows+1:2*nrows+1]: i-structure index
4799     */
4800     /*-------------------------------------------*/
4801     nrows       = len_si[proc]/2 - 1;
4802     buf_si_i    = buf_si + nrows+1;
4803     buf_si[0]   = nrows;
4804     buf_si_i[0] = 0;
4805     nrows       = 0;
4806     for (i=owners[proc]; i<owners[proc+1]; i++) {
4807       anzi = ai[i+1] - ai[i];
4808       if (anzi) {
4809         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4810         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4811         nrows++;
4812       }
4813     }
4814     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4815     k++;
4816     buf_si += len_si[proc];
4817   }
4818 
4819   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4820   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4821 
4822   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4823   for (i=0; i<merge->nrecv; i++) {
4824     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4825   }
4826 
4827   ierr = PetscFree(len_si);CHKERRQ(ierr);
4828   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4829   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4830   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4831   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4832   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4833   ierr = PetscFree(status);CHKERRQ(ierr);
4834 
4835   /* compute a local seq matrix in each processor */
4836   /*----------------------------------------------*/
4837   /* allocate bi array and free space for accumulating nonzero column info */
4838   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4839   bi[0] = 0;
4840 
4841   /* create and initialize a linked list */
4842   nlnk = N+1;
4843   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4844 
4845   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4846   len  = ai[owners[rank+1]] - ai[owners[rank]];
4847   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4848 
4849   current_space = free_space;
4850 
4851   /* determine symbolic info for each local row */
4852   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4853 
4854   for (k=0; k<merge->nrecv; k++) {
4855     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4856     nrows       = *buf_ri_k[k];
4857     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4858     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4859   }
4860 
4861   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4862   len  = 0;
4863   for (i=0; i<m; i++) {
4864     bnzi = 0;
4865     /* add local non-zero cols of this proc's seqmat into lnk */
4866     arow  = owners[rank] + i;
4867     anzi  = ai[arow+1] - ai[arow];
4868     aj    = a->j + ai[arow];
4869     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4870     bnzi += nlnk;
4871     /* add received col data into lnk */
4872     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4873       if (i == *nextrow[k]) { /* i-th row */
4874         anzi  = *(nextai[k]+1) - *nextai[k];
4875         aj    = buf_rj[k] + *nextai[k];
4876         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4877         bnzi += nlnk;
4878         nextrow[k]++; nextai[k]++;
4879       }
4880     }
4881     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4882 
4883     /* if free space is not available, make more free space */
4884     if (current_space->local_remaining<bnzi) {
4885       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4886       nspacedouble++;
4887     }
4888     /* copy data into free space, then initialize lnk */
4889     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4890     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4891 
4892     current_space->array           += bnzi;
4893     current_space->local_used      += bnzi;
4894     current_space->local_remaining -= bnzi;
4895 
4896     bi[i+1] = bi[i] + bnzi;
4897   }
4898 
4899   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4900 
4901   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4902   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4903   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4904 
4905   /* create symbolic parallel matrix B_mpi */
4906   /*---------------------------------------*/
4907   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4908   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4909   if (n==PETSC_DECIDE) {
4910     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4911   } else {
4912     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4913   }
4914   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4915   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4916   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4917   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4918   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4919 
4920   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4921   B_mpi->assembled    = PETSC_FALSE;
4922   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4923   merge->bi           = bi;
4924   merge->bj           = bj;
4925   merge->buf_ri       = buf_ri;
4926   merge->buf_rj       = buf_rj;
4927   merge->coi          = NULL;
4928   merge->coj          = NULL;
4929   merge->owners_co    = NULL;
4930 
4931   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4932 
4933   /* attach the supporting struct to B_mpi for reuse */
4934   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4935   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4936   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4937   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4938   *mpimat = B_mpi;
4939 
4940   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4941   PetscFunctionReturn(0);
4942 }
4943 
4944 #undef __FUNCT__
4945 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4946 /*@C
4947       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4948                  matrices from each processor
4949 
4950     Collective on MPI_Comm
4951 
4952    Input Parameters:
4953 +    comm - the communicators the parallel matrix will live on
4954 .    seqmat - the input sequential matrices
4955 .    m - number of local rows (or PETSC_DECIDE)
4956 .    n - number of local columns (or PETSC_DECIDE)
4957 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 
4959    Output Parameter:
4960 .    mpimat - the parallel matrix generated
4961 
4962     Level: advanced
4963 
4964    Notes:
4965      The dimensions of the sequential matrix in each processor MUST be the same.
4966      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4967      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4968 @*/
4969 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4970 {
4971   PetscErrorCode ierr;
4972   PetscMPIInt    size;
4973 
4974   PetscFunctionBegin;
4975   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4976   if (size == 1) {
4977     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4978     if (scall == MAT_INITIAL_MATRIX) {
4979       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4980     } else {
4981       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4982     }
4983     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4984     PetscFunctionReturn(0);
4985   }
4986   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4987   if (scall == MAT_INITIAL_MATRIX) {
4988     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4989   }
4990   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4991   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 #undef __FUNCT__
4996 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4997 /*@
4998      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4999           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5000           with MatGetSize()
5001 
5002     Not Collective
5003 
5004    Input Parameters:
5005 +    A - the matrix
5006 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5007 
5008    Output Parameter:
5009 .    A_loc - the local sequential matrix generated
5010 
5011     Level: developer
5012 
5013 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5014 
5015 @*/
5016 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5017 {
5018   PetscErrorCode ierr;
5019   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5020   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5021   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5022   MatScalar      *aa=a->a,*ba=b->a,*cam;
5023   PetscScalar    *ca;
5024   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5025   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5026   PetscBool      match;
5027 
5028   PetscFunctionBegin;
5029   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5030   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5031   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5032   if (scall == MAT_INITIAL_MATRIX) {
5033     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5034     ci[0] = 0;
5035     for (i=0; i<am; i++) {
5036       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5037     }
5038     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5039     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5040     k    = 0;
5041     for (i=0; i<am; i++) {
5042       ncols_o = bi[i+1] - bi[i];
5043       ncols_d = ai[i+1] - ai[i];
5044       /* off-diagonal portion of A */
5045       for (jo=0; jo<ncols_o; jo++) {
5046         col = cmap[*bj];
5047         if (col >= cstart) break;
5048         cj[k]   = col; bj++;
5049         ca[k++] = *ba++;
5050       }
5051       /* diagonal portion of A */
5052       for (j=0; j<ncols_d; j++) {
5053         cj[k]   = cstart + *aj++;
5054         ca[k++] = *aa++;
5055       }
5056       /* off-diagonal portion of A */
5057       for (j=jo; j<ncols_o; j++) {
5058         cj[k]   = cmap[*bj++];
5059         ca[k++] = *ba++;
5060       }
5061     }
5062     /* put together the new matrix */
5063     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5064     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5065     /* Since these are PETSc arrays, change flags to free them as necessary. */
5066     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5067     mat->free_a  = PETSC_TRUE;
5068     mat->free_ij = PETSC_TRUE;
5069     mat->nonew   = 0;
5070   } else if (scall == MAT_REUSE_MATRIX) {
5071     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5072     ci = mat->i; cj = mat->j; cam = mat->a;
5073     for (i=0; i<am; i++) {
5074       /* off-diagonal portion of A */
5075       ncols_o = bi[i+1] - bi[i];
5076       for (jo=0; jo<ncols_o; jo++) {
5077         col = cmap[*bj];
5078         if (col >= cstart) break;
5079         *cam++ = *ba++; bj++;
5080       }
5081       /* diagonal portion of A */
5082       ncols_d = ai[i+1] - ai[i];
5083       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5084       /* off-diagonal portion of A */
5085       for (j=jo; j<ncols_o; j++) {
5086         *cam++ = *ba++; bj++;
5087       }
5088     }
5089   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5090   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5096 /*@C
5097      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    row, col - index sets of rows and columns to extract (or NULL)
5105 
5106    Output Parameter:
5107 .    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5112 
5113 @*/
5114 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5115 {
5116   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode ierr;
5118   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5119   IS             isrowa,iscola;
5120   Mat            *aloc;
5121   PetscBool      match;
5122 
5123   PetscFunctionBegin;
5124   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5125   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5126   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   if (!row) {
5128     start = A->rmap->rstart; end = A->rmap->rend;
5129     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5130   } else {
5131     isrowa = *row;
5132   }
5133   if (!col) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5148   } else {
5149     iscola = *col;
5150   }
5151   if (scall != MAT_INITIAL_MATRIX) {
5152     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5153     aloc[0] = *A_loc;
5154   }
5155   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5156   *A_loc = aloc[0];
5157   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5158   if (!row) {
5159     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5160   }
5161   if (!col) {
5162     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5163   }
5164   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 #undef __FUNCT__
5169 #define __FUNCT__ "MatGetBrowsOfAcols"
5170 /*@C
5171     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5172 
5173     Collective on Mat
5174 
5175    Input Parameters:
5176 +    A,B - the matrices in mpiaij format
5177 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5178 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5179 
5180    Output Parameter:
5181 +    rowb, colb - index sets of rows and columns of B to extract
5182 -    B_seq - the sequential matrix generated
5183 
5184     Level: developer
5185 
5186 @*/
5187 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5188 {
5189   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5190   PetscErrorCode ierr;
5191   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5192   IS             isrowb,iscolb;
5193   Mat            *bseq=NULL;
5194 
5195   PetscFunctionBegin;
5196   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5197     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5198   }
5199   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5200 
5201   if (scall == MAT_INITIAL_MATRIX) {
5202     start = A->cmap->rstart;
5203     cmap  = a->garray;
5204     nzA   = a->A->cmap->n;
5205     nzB   = a->B->cmap->n;
5206     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5207     ncols = 0;
5208     for (i=0; i<nzB; i++) {  /* row < local row index */
5209       if (cmap[i] < start) idx[ncols++] = cmap[i];
5210       else break;
5211     }
5212     imark = i;
5213     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5214     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5215     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5216     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5217   } else {
5218     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5219     isrowb  = *rowb; iscolb = *colb;
5220     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5221     bseq[0] = *B_seq;
5222   }
5223   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5224   *B_seq = bseq[0];
5225   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5226   if (!rowb) {
5227     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5228   } else {
5229     *rowb = isrowb;
5230   }
5231   if (!colb) {
5232     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5233   } else {
5234     *colb = iscolb;
5235   }
5236   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5237   PetscFunctionReturn(0);
5238 }
5239 
5240 #undef __FUNCT__
5241 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5242 /*
5243     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5244     of the OFF-DIAGONAL portion of local A
5245 
5246     Collective on Mat
5247 
5248    Input Parameters:
5249 +    A,B - the matrices in mpiaij format
5250 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5251 
5252    Output Parameter:
5253 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5254 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5255 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5256 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5257 
5258     Level: developer
5259 
5260 */
5261 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5262 {
5263   VecScatter_MPI_General *gen_to,*gen_from;
5264   PetscErrorCode         ierr;
5265   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5266   Mat_SeqAIJ             *b_oth;
5267   VecScatter             ctx =a->Mvctx;
5268   MPI_Comm               comm=((PetscObject)ctx)->comm;
5269   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5270   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5271   PetscScalar            *rvalues,*svalues;
5272   MatScalar              *b_otha,*bufa,*bufA;
5273   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5274   MPI_Request            *rwaits = NULL,*swaits = NULL;
5275   MPI_Status             *sstatus,rstatus;
5276   PetscMPIInt            jj;
5277   PetscInt               *cols,sbs,rbs;
5278   PetscScalar            *vals;
5279 
5280   PetscFunctionBegin;
5281   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5282     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5283   }
5284   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5285   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5286 
5287   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5288   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5289   rvalues  = gen_from->values; /* holds the length of receiving row */
5290   svalues  = gen_to->values;   /* holds the length of sending row */
5291   nrecvs   = gen_from->n;
5292   nsends   = gen_to->n;
5293 
5294   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5295   srow    = gen_to->indices;    /* local row index to be sent */
5296   sstarts = gen_to->starts;
5297   sprocs  = gen_to->procs;
5298   sstatus = gen_to->sstatus;
5299   sbs     = gen_to->bs;
5300   rstarts = gen_from->starts;
5301   rprocs  = gen_from->procs;
5302   rbs     = gen_from->bs;
5303 
5304   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5305   if (scall == MAT_INITIAL_MATRIX) {
5306     /* i-array */
5307     /*---------*/
5308     /*  post receives */
5309     for (i=0; i<nrecvs; i++) {
5310       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5311       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5312       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* pack the outgoing message */
5316     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5317 
5318     sstartsj[0] = 0;
5319     rstartsj[0] = 0;
5320     len         = 0; /* total length of j or a array to be sent */
5321     k           = 0;
5322     for (i=0; i<nsends; i++) {
5323       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5324       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5325       for (j=0; j<nrows; j++) {
5326         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5327         for (l=0; l<sbs; l++) {
5328           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5329 
5330           rowlen[j*sbs+l] = ncols;
5331 
5332           len += ncols;
5333           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5334         }
5335         k++;
5336       }
5337       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5338 
5339       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5340     }
5341     /* recvs and sends of i-array are completed */
5342     i = nrecvs;
5343     while (i--) {
5344       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5345     }
5346     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5347 
5348     /* allocate buffers for sending j and a arrays */
5349     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5350     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5351 
5352     /* create i-array of B_oth */
5353     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5354 
5355     b_othi[0] = 0;
5356     len       = 0; /* total length of j or a array to be received */
5357     k         = 0;
5358     for (i=0; i<nrecvs; i++) {
5359       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5360       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5361       for (j=0; j<nrows; j++) {
5362         b_othi[k+1] = b_othi[k] + rowlen[j];
5363         len        += rowlen[j]; k++;
5364       }
5365       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5366     }
5367 
5368     /* allocate space for j and a arrrays of B_oth */
5369     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5370     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5371 
5372     /* j-array */
5373     /*---------*/
5374     /*  post receives of j-array */
5375     for (i=0; i<nrecvs; i++) {
5376       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5377       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5378     }
5379 
5380     /* pack the outgoing message j-array */
5381     k = 0;
5382     for (i=0; i<nsends; i++) {
5383       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5384       bufJ  = bufj+sstartsj[i];
5385       for (j=0; j<nrows; j++) {
5386         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5387         for (ll=0; ll<sbs; ll++) {
5388           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5389           for (l=0; l<ncols; l++) {
5390             *bufJ++ = cols[l];
5391           }
5392           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5393         }
5394       }
5395       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5396     }
5397 
5398     /* recvs and sends of j-array are completed */
5399     i = nrecvs;
5400     while (i--) {
5401       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5402     }
5403     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5404   } else if (scall == MAT_REUSE_MATRIX) {
5405     sstartsj = *startsj_s;
5406     rstartsj = *startsj_r;
5407     bufa     = *bufa_ptr;
5408     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5409     b_otha   = b_oth->a;
5410   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5411 
5412   /* a-array */
5413   /*---------*/
5414   /*  post receives of a-array */
5415   for (i=0; i<nrecvs; i++) {
5416     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5417     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5418   }
5419 
5420   /* pack the outgoing message a-array */
5421   k = 0;
5422   for (i=0; i<nsends; i++) {
5423     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5424     bufA  = bufa+sstartsj[i];
5425     for (j=0; j<nrows; j++) {
5426       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5427       for (ll=0; ll<sbs; ll++) {
5428         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5429         for (l=0; l<ncols; l++) {
5430           *bufA++ = vals[l];
5431         }
5432         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5433       }
5434     }
5435     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5436   }
5437   /* recvs and sends of a-array are completed */
5438   i = nrecvs;
5439   while (i--) {
5440     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5441   }
5442   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5443   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5444 
5445   if (scall == MAT_INITIAL_MATRIX) {
5446     /* put together the new matrix */
5447     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5448 
5449     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5450     /* Since these are PETSc arrays, change flags to free them as necessary. */
5451     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5452     b_oth->free_a  = PETSC_TRUE;
5453     b_oth->free_ij = PETSC_TRUE;
5454     b_oth->nonew   = 0;
5455 
5456     ierr = PetscFree(bufj);CHKERRQ(ierr);
5457     if (!startsj_s || !bufa_ptr) {
5458       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5459       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5460     } else {
5461       *startsj_s = sstartsj;
5462       *startsj_r = rstartsj;
5463       *bufa_ptr  = bufa;
5464     }
5465   }
5466   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 #undef __FUNCT__
5471 #define __FUNCT__ "MatGetCommunicationStructs"
5472 /*@C
5473   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5474 
5475   Not Collective
5476 
5477   Input Parameters:
5478 . A - The matrix in mpiaij format
5479 
5480   Output Parameter:
5481 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5482 . colmap - A map from global column index to local index into lvec
5483 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5484 
5485   Level: developer
5486 
5487 @*/
5488 #if defined(PETSC_USE_CTABLE)
5489 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5490 #else
5491 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5492 #endif
5493 {
5494   Mat_MPIAIJ *a;
5495 
5496   PetscFunctionBegin;
5497   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5498   PetscValidPointer(lvec, 2);
5499   PetscValidPointer(colmap, 3);
5500   PetscValidPointer(multScatter, 4);
5501   a = (Mat_MPIAIJ*) A->data;
5502   if (lvec) *lvec = a->lvec;
5503   if (colmap) *colmap = a->colmap;
5504   if (multScatter) *multScatter = a->Mvctx;
5505   PetscFunctionReturn(0);
5506 }
5507 
5508 EXTERN_C_BEGIN
5509 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5510 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5511 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5512 EXTERN_C_END
5513 
5514 #undef __FUNCT__
5515 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5516 /*
5517     Computes (B'*A')' since computing B*A directly is untenable
5518 
5519                n                       p                          p
5520         (              )       (              )         (                  )
5521       m (      A       )  *  n (       B      )   =   m (         C        )
5522         (              )       (              )         (                  )
5523 
5524 */
5525 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5526 {
5527   PetscErrorCode ierr;
5528   Mat            At,Bt,Ct;
5529 
5530   PetscFunctionBegin;
5531   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5532   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5533   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5534   ierr = MatDestroy(&At);CHKERRQ(ierr);
5535   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5536   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5537   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5538   PetscFunctionReturn(0);
5539 }
5540 
5541 #undef __FUNCT__
5542 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5543 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5544 {
5545   PetscErrorCode ierr;
5546   PetscInt       m=A->rmap->n,n=B->cmap->n;
5547   Mat            Cmat;
5548 
5549   PetscFunctionBegin;
5550   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5551   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5552   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5553   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5554   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5555   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5556   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5557   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5558 
5559   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5560 
5561   *C = Cmat;
5562   PetscFunctionReturn(0);
5563 }
5564 
5565 /* ----------------------------------------------------------------*/
5566 #undef __FUNCT__
5567 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5568 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5569 {
5570   PetscErrorCode ierr;
5571 
5572   PetscFunctionBegin;
5573   if (scall == MAT_INITIAL_MATRIX) {
5574     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5575   }
5576   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5577   PetscFunctionReturn(0);
5578 }
5579 
5580 EXTERN_C_BEGIN
5581 #if defined(PETSC_HAVE_MUMPS)
5582 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5583 #endif
5584 #if defined(PETSC_HAVE_PASTIX)
5585 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5586 #endif
5587 #if defined(PETSC_HAVE_SUPERLU_DIST)
5588 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5589 #endif
5590 #if defined(PETSC_HAVE_CLIQUE)
5591 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5592 #endif
5593 EXTERN_C_END
5594 
5595 /*MC
5596    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5597 
5598    Options Database Keys:
5599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5600 
5601   Level: beginner
5602 
5603 .seealso: MatCreateAIJ()
5604 M*/
5605 
5606 EXTERN_C_BEGIN
5607 #undef __FUNCT__
5608 #define __FUNCT__ "MatCreate_MPIAIJ"
5609 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5610 {
5611   Mat_MPIAIJ     *b;
5612   PetscErrorCode ierr;
5613   PetscMPIInt    size;
5614 
5615   PetscFunctionBegin;
5616   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5617 
5618   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5619   B->data       = (void*)b;
5620   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5621   B->assembled  = PETSC_FALSE;
5622   B->insertmode = NOT_SET_VALUES;
5623   b->size       = size;
5624 
5625   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5626 
5627   /* build cache for off array entries formed */
5628   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5629 
5630   b->donotstash  = PETSC_FALSE;
5631   b->colmap      = 0;
5632   b->garray      = 0;
5633   b->roworiented = PETSC_TRUE;
5634 
5635   /* stuff used for matrix vector multiply */
5636   b->lvec  = NULL;
5637   b->Mvctx = NULL;
5638 
5639   /* stuff for MatGetRow() */
5640   b->rowindices   = 0;
5641   b->rowvalues    = 0;
5642   b->getrowactive = PETSC_FALSE;
5643 
5644   /* flexible pointer used in CUSP/CUSPARSE classes */
5645   b->spptr = NULL;
5646 
5647 #if defined(PETSC_HAVE_MUMPS)
5648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5649                                            "MatGetFactor_aij_mumps",
5650                                            MatGetFactor_aij_mumps);CHKERRQ(ierr);
5651 #endif
5652 #if defined(PETSC_HAVE_PASTIX)
5653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5654                                            "MatGetFactor_mpiaij_pastix",
5655                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5656 #endif
5657 #if defined(PETSC_HAVE_SUPERLU_DIST)
5658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5659                                            "MatGetFactor_mpiaij_superlu_dist",
5660                                            MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_CLIQUE)
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5664                                            "MatGetFactor_aij_clique",
5665                                            MatGetFactor_aij_clique);CHKERRQ(ierr);
5666 #endif
5667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5668                                            "MatStoreValues_MPIAIJ",
5669                                            MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5671                                            "MatRetrieveValues_MPIAIJ",
5672                                            MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5674                                            "MatGetDiagonalBlock_MPIAIJ",
5675                                            MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5677                                            "MatIsTranspose_MPIAIJ",
5678                                            MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5680                                            "MatMPIAIJSetPreallocation_MPIAIJ",
5681                                            MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5683                                            "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5684                                            MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5686                                            "MatDiagonalScaleLocal_MPIAIJ",
5687                                            MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5689                                            "MatConvert_MPIAIJ_MPIAIJPERM",
5690                                            MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5692                                            "MatConvert_MPIAIJ_MPIAIJCRL",
5693                                            MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5695                                            "MatConvert_MPIAIJ_MPISBAIJ",
5696                                            MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5698                                            "MatMatMult_MPIDense_MPIAIJ",
5699                                            MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5701                                            "MatMatMultSymbolic_MPIDense_MPIAIJ",
5702                                            MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5704                                            "MatMatMultNumeric_MPIDense_MPIAIJ",
5705                                            MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5706   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5707   PetscFunctionReturn(0);
5708 }
5709 EXTERN_C_END
5710 
5711 #undef __FUNCT__
5712 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5713 /*@
5714      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5715          and "off-diagonal" part of the matrix in CSR format.
5716 
5717    Collective on MPI_Comm
5718 
5719    Input Parameters:
5720 +  comm - MPI communicator
5721 .  m - number of local rows (Cannot be PETSC_DECIDE)
5722 .  n - This value should be the same as the local size used in creating the
5723        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5724        calculated if N is given) For square matrices n is almost always m.
5725 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5726 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5727 .   i - row indices for "diagonal" portion of matrix
5728 .   j - column indices
5729 .   a - matrix values
5730 .   oi - row indices for "off-diagonal" portion of matrix
5731 .   oj - column indices
5732 -   oa - matrix values
5733 
5734    Output Parameter:
5735 .   mat - the matrix
5736 
5737    Level: advanced
5738 
5739    Notes:
5740        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5741        must free the arrays once the matrix has been destroyed and not before.
5742 
5743        The i and j indices are 0 based
5744 
5745        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5746 
5747        This sets local rows and cannot be used to set off-processor values.
5748 
5749        You cannot later use MatSetValues() to change values in this matrix.
5750 
5751 .keywords: matrix, aij, compressed row, sparse, parallel
5752 
5753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5754           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5755 @*/
5756 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5757 {
5758   PetscErrorCode ierr;
5759   Mat_MPIAIJ     *maij;
5760 
5761   PetscFunctionBegin;
5762   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5763   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5764   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5765   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5766   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5767   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5768   maij = (Mat_MPIAIJ*) (*mat)->data;
5769 
5770   maij->donotstash     = PETSC_TRUE;
5771   (*mat)->preallocated = PETSC_TRUE;
5772 
5773   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5774   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5775 
5776   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5777   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5778 
5779   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5780   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5781   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5782   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5783 
5784   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5785   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5786   PetscFunctionReturn(0);
5787 }
5788 
5789 /*
5790     Special version for direct calls from Fortran
5791 */
5792 #include <petsc-private/fortranimpl.h>
5793 
5794 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5798 #endif
5799 
5800 /* Change these macros so can be used in void function */
5801 #undef CHKERRQ
5802 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5803 #undef SETERRQ2
5804 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5805 #undef SETERRQ3
5806 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5807 #undef SETERRQ
5808 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5809 
5810 EXTERN_C_BEGIN
5811 #undef __FUNCT__
5812 #define __FUNCT__ "matsetvaluesmpiaij_"
5813 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5814 {
5815   Mat            mat  = *mmat;
5816   PetscInt       m    = *mm, n = *mn;
5817   InsertMode     addv = *maddv;
5818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5819   PetscScalar    value;
5820   PetscErrorCode ierr;
5821 
5822   MatCheckPreallocated(mat,1);
5823   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5824 
5825 #if defined(PETSC_USE_DEBUG)
5826   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5827 #endif
5828   {
5829     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5830     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5831     PetscBool roworiented = aij->roworiented;
5832 
5833     /* Some Variables required in the macro */
5834     Mat        A                 = aij->A;
5835     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5836     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5837     MatScalar  *aa               = a->a;
5838     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5839     Mat        B                 = aij->B;
5840     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5841     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5842     MatScalar  *ba               = b->a;
5843 
5844     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5845     PetscInt  nonew = a->nonew;
5846     MatScalar *ap1,*ap2;
5847 
5848     PetscFunctionBegin;
5849     for (i=0; i<m; i++) {
5850       if (im[i] < 0) continue;
5851 #if defined(PETSC_USE_DEBUG)
5852       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5853 #endif
5854       if (im[i] >= rstart && im[i] < rend) {
5855         row      = im[i] - rstart;
5856         lastcol1 = -1;
5857         rp1      = aj + ai[row];
5858         ap1      = aa + ai[row];
5859         rmax1    = aimax[row];
5860         nrow1    = ailen[row];
5861         low1     = 0;
5862         high1    = nrow1;
5863         lastcol2 = -1;
5864         rp2      = bj + bi[row];
5865         ap2      = ba + bi[row];
5866         rmax2    = bimax[row];
5867         nrow2    = bilen[row];
5868         low2     = 0;
5869         high2    = nrow2;
5870 
5871         for (j=0; j<n; j++) {
5872           if (roworiented) value = v[i*n+j];
5873           else value = v[i+j*m];
5874           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5875           if (in[j] >= cstart && in[j] < cend) {
5876             col = in[j] - cstart;
5877             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5878           } else if (in[j] < 0) continue;
5879 #if defined(PETSC_USE_DEBUG)
5880           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5881 #endif
5882           else {
5883             if (mat->was_assembled) {
5884               if (!aij->colmap) {
5885                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5886               }
5887 #if defined(PETSC_USE_CTABLE)
5888               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5889               col--;
5890 #else
5891               col = aij->colmap[in[j]] - 1;
5892 #endif
5893               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5894                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5895                 col  =  in[j];
5896                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5897                 B     = aij->B;
5898                 b     = (Mat_SeqAIJ*)B->data;
5899                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5900                 rp2   = bj + bi[row];
5901                 ap2   = ba + bi[row];
5902                 rmax2 = bimax[row];
5903                 nrow2 = bilen[row];
5904                 low2  = 0;
5905                 high2 = nrow2;
5906                 bm    = aij->B->rmap->n;
5907                 ba    = b->a;
5908               }
5909             } else col = in[j];
5910             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5911           }
5912         }
5913       } else if (!aij->donotstash) {
5914         if (roworiented) {
5915           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5916         } else {
5917           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5918         }
5919       }
5920     }
5921   }
5922   PetscFunctionReturnVoid();
5923 }
5924 EXTERN_C_END
5925 
5926