xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 
6 /*MC
7    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8 
9    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
10    and MATMPIAIJ otherwise.  As a result, for single process communicators,
11   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
12   for communicators controlling multiple processes.  It is recommended that you call both of
13   the above preallocation routines for simplicity.
14 
15    Options Database Keys:
16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17 
18   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
19    enough exist.
20 
21   Level: beginner
22 
23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
24 M*/
25 
26 /*MC
27    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28 
29    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
30    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
31    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
32   for communicators controlling multiple processes.  It is recommended that you call both of
33   the above preallocation routines for simplicity.
34 
35    Options Database Keys:
36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37 
38   Level: beginner
39 
40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
41 M*/
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
46 {
47   PetscErrorCode  ierr;
48   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
49   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
50   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
51   const PetscInt  *ia,*ib;
52   const MatScalar *aa,*bb;
53   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
54   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
55 
56   PetscFunctionBegin;
57   *keptrows = 0;
58   ia        = a->i;
59   ib        = b->i;
60   for (i=0; i<m; i++) {
61     na = ia[i+1] - ia[i];
62     nb = ib[i+1] - ib[i];
63     if (!na && !nb) {
64       cnt++;
65       goto ok1;
66     }
67     aa = a->a + ia[i];
68     for (j=0; j<na; j++) {
69       if (aa[j] != 0.0) goto ok1;
70     }
71     bb = b->a + ib[i];
72     for (j=0; j <nb; j++) {
73       if (bb[j] != 0.0) goto ok1;
74     }
75     cnt++;
76 ok1:;
77   }
78   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
79   if (!n0rows) PetscFunctionReturn(0);
80   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
81   cnt  = 0;
82   for (i=0; i<m; i++) {
83     na = ia[i+1] - ia[i];
84     nb = ib[i+1] - ib[i];
85     if (!na && !nb) continue;
86     aa = a->a + ia[i];
87     for (j=0; j<na;j++) {
88       if (aa[j] != 0.0) {
89         rows[cnt++] = rstart + i;
90         goto ok2;
91       }
92     }
93     bb = b->a + ib[i];
94     for (j=0; j<nb; j++) {
95       if (bb[j] != 0.0) {
96         rows[cnt++] = rstart + i;
97         goto ok2;
98       }
99     }
100 ok2:;
101   }
102   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
109 {
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
111   PetscErrorCode ierr;
112   PetscInt       i,rstart,nrows,*rows;
113 
114   PetscFunctionBegin;
115   *zrows = NULL;
116   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
117   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
118   for (i=0; i<nrows; i++) rows[i] += rstart;
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
126 {
127   PetscErrorCode ierr;
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
129   PetscInt       i,n,*garray = aij->garray;
130   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
131   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
132   PetscReal      *work;
133 
134   PetscFunctionBegin;
135   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
136   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
137   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 */
181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
182 {
183   PetscMPIInt    rank,size;
184   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
185   PetscErrorCode ierr;
186   Mat            mat;
187   Mat_SeqAIJ     *gmata;
188   PetscMPIInt    tag;
189   MPI_Status     status;
190   PetscBool      aij;
191   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
192 
193   PetscFunctionBegin;
194   CHKMEMQ;
195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
197   if (!rank) {
198     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
199     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
200   }
201   if (reuse == MAT_INITIAL_MATRIX) {
202     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
203     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
204     if (!rank) {
205       bses[0] = gmat->rmap->bs;
206       bses[1] = gmat->cmap->bs;
207     }
208     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
209     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
210     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
211     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
212     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
213     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
214 
215     rowners[0] = 0;
216     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj   = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298 
299     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
300 
301     *inmat = mat;
302   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
303     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
304     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
305     mat  = *inmat;
306     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
307     if (!rank) {
308       /* send numerical values to other processes */
309       gmata  = (Mat_SeqAIJ*) gmat->data;
310       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
311       gmataa = gmata->a;
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
315       }
316       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
317     } else {
318       /* receive numerical values from process 0*/
319       nz   = Ad->nz + Ao->nz;
320       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
321       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
322     }
323     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
324     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
325     ad = Ad->a;
326     ao = Ao->a;
327     if (mat->rmap->n) {
328       i  = 0;
329       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     for (i=1; i<mat->rmap->n; i++) {
333       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
334       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
335     }
336     i--;
337     if (mat->rmap->n) {
338       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
339     }
340     if (rank) {
341       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
342     }
343   }
344   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
346   CHKMEMQ;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351   Local utility routine that creates a mapping from the global column
352 number to the local number in the off-diagonal part of the local
353 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
354 a slightly higher hash table cost; without it it is not scalable (each processor
355 has an order N integer array but is fast to acess.
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
360 {
361   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       n = aij->B->cmap->n,i;
364 
365   PetscFunctionBegin;
366   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
367 #if defined(PETSC_USE_CTABLE)
368   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
369   for (i=0; i<n; i++) {
370     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
371   }
372 #else
373   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
374   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
382 { \
383     if (col <= lastcol1)  low1 = 0;     \
384     else                 high1 = nrow1; \
385     lastcol1 = col;\
386     while (high1-low1 > 5) { \
387       t = (low1+high1)/2; \
388       if (rp1[t] > col) high1 = t; \
389       else              low1  = t; \
390     } \
391       for (_i=low1; _i<high1; _i++) { \
392         if (rp1[_i] > col) break; \
393         if (rp1[_i] == col) { \
394           if (addv == ADD_VALUES) ap1[_i] += value;   \
395           else                    ap1[_i] = value; \
396           goto a_noinsert; \
397         } \
398       }  \
399       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
400       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
401       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
402       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
403       N = nrow1++ - 1; a->nz++; high1++; \
404       /* shift up all the later entries in this row */ \
405       for (ii=N; ii>=_i; ii--) { \
406         rp1[ii+1] = rp1[ii]; \
407         ap1[ii+1] = ap1[ii]; \
408       } \
409       rp1[_i] = col;  \
410       ap1[_i] = value;  \
411       a_noinsert: ; \
412       ailen[row] = nrow1; \
413 }
414 
415 
416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
417   { \
418     if (col <= lastcol2) low2 = 0;                        \
419     else high2 = nrow2;                                   \
420     lastcol2 = col;                                       \
421     while (high2-low2 > 5) {                              \
422       t = (low2+high2)/2;                                 \
423       if (rp2[t] > col) high2 = t;                        \
424       else             low2  = t;                         \
425     }                                                     \
426     for (_i=low2; _i<high2; _i++) {                       \
427       if (rp2[_i] > col) break;                           \
428       if (rp2[_i] == col) {                               \
429         if (addv == ADD_VALUES) ap2[_i] += value;         \
430         else                    ap2[_i] = value;          \
431         goto b_noinsert;                                  \
432       }                                                   \
433     }                                                     \
434     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
435     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
436     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
437     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
438     N = nrow2++ - 1; b->nz++; high2++;                    \
439     /* shift up all the later entries in this row */      \
440     for (ii=N; ii>=_i; ii--) {                            \
441       rp2[ii+1] = rp2[ii];                                \
442       ap2[ii+1] = ap2[ii];                                \
443     }                                                     \
444     rp2[_i] = col;                                        \
445     ap2[_i] = value;                                      \
446     b_noinsert: ;                                         \
447     bilen[row] = nrow2;                                   \
448   }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
453 {
454   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
455   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
456   PetscErrorCode ierr;
457   PetscInt       l,*garray = mat->garray,diag;
458 
459   PetscFunctionBegin;
460   /* code only works for square matrices A */
461 
462   /* find size of row to the left of the diagonal part */
463   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
464   row  = row - diag;
465   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
466     if (garray[b->j[b->i[row]+l]] > diag) break;
467   }
468   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* diagonal part */
471   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
472 
473   /* right of diagonal part */
474   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSetValues_MPIAIJ"
480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   PetscScalar    value;
484   PetscErrorCode ierr;
485   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
486   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
487   PetscBool      roworiented = aij->roworiented;
488 
489   /* Some Variables required in the macro */
490   Mat        A                 = aij->A;
491   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
492   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
493   MatScalar  *aa               = a->a;
494   PetscBool  ignorezeroentries = a->ignorezeroentries;
495   Mat        B                 = aij->B;
496   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
497   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
498   MatScalar  *ba               = b->a;
499 
500   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
501   PetscInt  nonew;
502   MatScalar *ap1,*ap2;
503 
504   PetscFunctionBegin;
505   if (v) PetscValidScalarPointer(v,6);
506   for (i=0; i<m; i++) {
507     if (im[i] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
510 #endif
511     if (im[i] >= rstart && im[i] < rend) {
512       row      = im[i] - rstart;
513       lastcol1 = -1;
514       rp1      = aj + ai[row];
515       ap1      = aa + ai[row];
516       rmax1    = aimax[row];
517       nrow1    = ailen[row];
518       low1     = 0;
519       high1    = nrow1;
520       lastcol2 = -1;
521       rp2      = bj + bi[row];
522       ap2      = ba + bi[row];
523       rmax2    = bimax[row];
524       nrow2    = bilen[row];
525       low2     = 0;
526       high2    = nrow2;
527 
528       for (j=0; j<n; j++) {
529         if (v) {
530           if (roworiented) value = v[i*n+j];
531           else             value = v[i+j*m];
532         } else value = 0.0;
533         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
534         if (in[j] >= cstart && in[j] < cend) {
535           col   = in[j] - cstart;
536           nonew = a->nonew;
537           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
538         } else if (in[j] < 0) continue;
539 #if defined(PETSC_USE_DEBUG)
540         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
541 #endif
542         else {
543           if (mat->was_assembled) {
544             if (!aij->colmap) {
545               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
546             }
547 #if defined(PETSC_USE_CTABLE)
548             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
549             col--;
550 #else
551             col = aij->colmap[in[j]] - 1;
552 #endif
553             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
554               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
555               col  =  in[j];
556               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
557               B     = aij->B;
558               b     = (Mat_SeqAIJ*)B->data;
559               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
560               rp2   = bj + bi[row];
561               ap2   = ba + bi[row];
562               rmax2 = bimax[row];
563               nrow2 = bilen[row];
564               low2  = 0;
565               high2 = nrow2;
566               bm    = aij->B->rmap->n;
567               ba    = b->a;
568             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
569           } else col = in[j];
570           nonew = b->nonew;
571           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
572         }
573       }
574     } else {
575       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
576       if (!aij->donotstash) {
577         mat->assembled = PETSC_FALSE;
578         if (roworiented) {
579           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         } else {
581           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
582         }
583       }
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatGetValues_MPIAIJ"
591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
592 {
593   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
597 
598   PetscFunctionBegin;
599   for (i=0; i<m; i++) {
600     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
601     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
602     if (idxm[i] >= rstart && idxm[i] < rend) {
603       row = idxm[i] - rstart;
604       for (j=0; j<n; j++) {
605         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
606         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
607         if (idxn[j] >= cstart && idxn[j] < cend) {
608           col  = idxn[j] - cstart;
609           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
610         } else {
611           if (!aij->colmap) {
612             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
613           }
614 #if defined(PETSC_USE_CTABLE)
615           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
616           col--;
617 #else
618           col = aij->colmap[idxn[j]] - 1;
619 #endif
620           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
621           else {
622             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
623           }
624         }
625       }
626     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       nstash,reallocs;
640   InsertMode     addv;
641 
642   PetscFunctionBegin;
643   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n; ) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) {
681           if (row[j] != rstart) break;
682         }
683         if (j < n) ncols = j-i;
684         else       ncols = n-i;
685         /* Now assemble all these values with a single function call */
686         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
687 
688         i = j;
689       }
690     }
691     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
692   }
693   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
694   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
695 
696   /* determine if any processor has disassembled, if so we must
697      also disassemble ourselfs, in order that we may reassemble. */
698   /*
699      if nonzero structure of submatrix B cannot change then we know that
700      no processor disassembled thus we can skip this stuff
701   */
702   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
703     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
704     if (mat->was_assembled && !other_disassembled) {
705       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
706     }
707   }
708   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
709     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
710   }
711   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
749   PetscInt          i,*owners = A->rmap->range;
750   PetscInt          *nprocs,j,idx,nsends,row;
751   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
752   PetscInt          *rvalues,count,base,slen,*source;
753   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
754   MPI_Comm          comm;
755   MPI_Request       *send_waits,*recv_waits;
756   MPI_Status        recv_status,*send_status;
757   const PetscScalar *xx;
758   PetscScalar       *bb;
759 #if defined(PETSC_DEBUG)
760   PetscBool found = PETSC_FALSE;
761 #endif
762 
763   PetscFunctionBegin;
764   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
765   /*  first count number of contributors to each processor */
766   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
767   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
768   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
769   j    = 0;
770   for (i=0; i<N; i++) {
771     if (lastidx > (idx = rows[i])) j = 0;
772     lastidx = idx;
773     for (; j<size; j++) {
774       if (idx >= owners[j] && idx < owners[j+1]) {
775         nprocs[2*j]++;
776         nprocs[2*j+1] = 1;
777         owner[i]      = j;
778 #if defined(PETSC_DEBUG)
779         found = PETSC_TRUE;
780 #endif
781         break;
782       }
783     }
784 #if defined(PETSC_DEBUG)
785     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
786     found = PETSC_FALSE;
787 #endif
788   }
789   nsends = 0;
790   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
791 
792   if (A->nooffproczerorows) {
793     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
794     nrecvs = nsends;
795     nmax   = N;
796   } else {
797     /* inform other processors of number of messages and max length*/
798     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
799   }
800 
801   /* post receives:   */
802   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
804   for (i=0; i<nrecvs; i++) {
805     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
806   }
807 
808   /* do sends:
809       1) starts[i] gives the starting index in svalues for stuff going to
810          the ith processor
811   */
812   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
813   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
814   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
815 
816   starts[0] = 0;
817   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
818   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
819 
820   starts[0] = 0;
821   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
822   count = 0;
823   for (i=0; i<size; i++) {
824     if (nprocs[2*i+1]) {
825       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
826     }
827   }
828   ierr = PetscFree(starts);CHKERRQ(ierr);
829 
830   base = owners[rank];
831 
832   /*  wait on receives */
833   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
834   count = nrecvs; slen = 0;
835   while (count) {
836     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
837     /* unpack receives into our local space */
838     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
839 
840     source[imdex] = recv_status.MPI_SOURCE;
841     lens[imdex]   = n;
842     slen         += n;
843     count--;
844   }
845   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
846 
847   /* move the data into the send scatter */
848   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
849   count = 0;
850   for (i=0; i<nrecvs; i++) {
851     values = rvalues + i*nmax;
852     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
853   }
854   ierr = PetscFree(rvalues);CHKERRQ(ierr);
855   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
856   ierr = PetscFree(owner);CHKERRQ(ierr);
857   ierr = PetscFree(nprocs);CHKERRQ(ierr);
858 
859   /* fix right hand side if needed */
860   if (x && b) {
861     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
862     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
863     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
864     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
865     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
866   }
867   /*
868         Zero the required rows. If the "diagonal block" of the matrix
869      is square and the user wishes to set the diagonal we use separate
870      code so that MatSetValues() is not called for each diagonal allocating
871      new memory, thus calling lots of mallocs and slowing things down.
872 
873   */
874   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
875   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
876   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
877     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
878   } else if (diag != 0.0) {
879     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
880     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     for (i = 0; i < slen; i++) {
882       row  = lrows[i] + rstart;
883       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
884     }
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   } else {
888     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(lrows);CHKERRQ(ierr);
891 
892   /* wait on sends */
893   if (nsends) {
894     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
895     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
896     ierr = PetscFree(send_status);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(send_waits);CHKERRQ(ierr);
899   ierr = PetscFree(svalues);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
906 {
907   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
908   PetscErrorCode    ierr;
909   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
910   PetscInt          i,*owners = A->rmap->range;
911   PetscInt          *nprocs,j,idx,nsends;
912   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
913   PetscInt          *rvalues,count,base,slen,*source;
914   PetscInt          *lens,*lrows,*values,m;
915   MPI_Comm          comm;
916   MPI_Request       *send_waits,*recv_waits;
917   MPI_Status        recv_status,*send_status;
918   const PetscScalar *xx;
919   PetscScalar       *bb,*mask;
920   Vec               xmask,lmask;
921   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
922   const PetscInt    *aj, *ii,*ridx;
923   PetscScalar       *aa;
924 #if defined(PETSC_DEBUG)
925   PetscBool found = PETSC_FALSE;
926 #endif
927 
928   PetscFunctionBegin;
929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j    = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i]      = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973 
974   starts[0] = 0;
975   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
976   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
977 
978   starts[0] = 0;
979   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
980   count = 0;
981   for (i=0; i<size; i++) {
982     if (nprocs[2*i+1]) {
983       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
984     }
985   }
986   ierr = PetscFree(starts);CHKERRQ(ierr);
987 
988   base = owners[rank];
989 
990   /*  wait on receives */
991   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
992   count = nrecvs; slen = 0;
993   while (count) {
994     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
995     /* unpack receives into our local space */
996     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
997 
998     source[imdex] = recv_status.MPI_SOURCE;
999     lens[imdex]   = n;
1000     slen         += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1011   }
1012   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1013   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1014   ierr = PetscFree(owner);CHKERRQ(ierr);
1015   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1016   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1017 
1018   /* zero diagonal part of matrix */
1019   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1020 
1021   /* handle off diagonal part of matrix */
1022   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1023   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1024   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1025   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use) {
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++) {
1050       n  = ii[i+1] - ii[i];
1051       aj = aij->j + ii[i];
1052       aa = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n  = ii[i+1] - ii[i];
1068       aj = aij->j + ii[i];
1069       aa = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMult_MPIAIJ"
1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105   PetscInt       nt;
1106 
1107   PetscFunctionBegin;
1108   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1109   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1110   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150   PetscBool      merged;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1154   /* do nondiagonal part */
1155   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1156   if (!merged) {
1157     /* send it on its way */
1158     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     /* do local part */
1160     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1161     /* receive remote parts: note this assumes the values are not actually */
1162     /* added in yy until the next line, */
1163     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   } else {
1165     /* do local part */
1166     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1167     /* send it on its way */
1168     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     /* values actually were received in the Begin() but we need to call this nop */
1170     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 EXTERN_C_BEGIN
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 EXTERN_C_END
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   /* do nondiagonal part */
1228   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1229   /* send it on its way */
1230   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   /* do local part */
1232   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1233   /* receive remote parts */
1234   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 /*
1239   This only works correctly for square matrices where the subblock A->A is the
1240    diagonal block
1241 */
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1245 {
1246   PetscErrorCode ierr;
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248 
1249   PetscFunctionBegin;
1250   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1251   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1252   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatScale_MPIAIJ"
1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1259 {
1260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1265   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIAIJ"
1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1272 {
1273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1294   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1295 
1296   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1315   PetscErrorCode ierr;
1316   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1317   int            fd;
1318   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1319   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1320   PetscScalar    *column_values;
1321   PetscInt       message_count,flowcontrolcount;
1322   FILE           *file;
1323 
1324   PetscFunctionBegin;
1325   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1326   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1327   nz   = A->nz + B->nz;
1328   if (!rank) {
1329     header[0] = MAT_FILE_CLASSID;
1330     header[1] = mat->rmap->N;
1331     header[2] = mat->cmap->N;
1332 
1333     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1335     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     /* get largest number of rows any processor has */
1337     rlen  = mat->rmap->n;
1338     range = mat->rmap->range;
1339     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1340   } else {
1341     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     rlen = mat->rmap->n;
1343   }
1344 
1345   /* load up the local row counts */
1346   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1347   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1348 
1349   /* store the row lengths to the file */
1350   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1351   if (!rank) {
1352     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     for (i=1; i<size; i++) {
1354       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1355       rlen = range[i+1] - range[i];
1356       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1360   } else {
1361     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1362     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1363     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1366 
1367   /* load up the local column indices */
1368   nzmax = nz; /* th processor needs space a largest processor needs */
1369   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1370   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1371   cnt   = 0;
1372   for (i=0; i<mat->rmap->n; i++) {
1373     for (j=B->i[i]; j<B->i[i+1]; j++) {
1374       if ((col = garray[B->j[j]]) > cstart) break;
1375       column_indices[cnt++] = col;
1376     }
1377     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1378     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1379   }
1380   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1381 
1382   /* store the column indices to the file */
1383   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1384   if (!rank) {
1385     MPI_Status status;
1386     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1387     for (i=1; i<size; i++) {
1388       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1389       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1390       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1391       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1393     }
1394     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1395   } else {
1396     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1397     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1399     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1400   }
1401   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1402 
1403   /* load up the local column values */
1404   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1405   cnt  = 0;
1406   for (i=0; i<mat->rmap->n; i++) {
1407     for (j=B->i[i]; j<B->i[i+1]; j++) {
1408       if (garray[B->j[j]] > cstart) break;
1409       column_values[cnt++] = B->a[j];
1410     }
1411     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1412     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1413   }
1414   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1415 
1416   /* store the column values to the file */
1417   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1418   if (!rank) {
1419     MPI_Status status;
1420     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1421     for (i=1; i<size; i++) {
1422       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1423       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1424       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1425       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1426       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1427     }
1428     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1429   } else {
1430     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1431     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1433     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1434   }
1435   ierr = PetscFree(column_values);CHKERRQ(ierr);
1436 
1437   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1438   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511   PetscInt    nzlocal,nsends,nrecvs;
2512   PetscMPIInt *send_rank,*recv_rank;
2513   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514   PetscScalar *sbuf_a,**rbuf_a;
2515   PetscErrorCode (*Destroy)(Mat);
2516 } Mat_Redundant;
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2521 {
2522   PetscErrorCode ierr;
2523   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2524   PetscInt       i;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2528   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2530   for (i=0; i<redund->nrecvs; i++) {
2531     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2532     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2533   }
2534   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2535   ierr = PetscFree(redund);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatDestroy_MatRedundant"
2541 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2542 {
2543   PetscErrorCode ierr;
2544   PetscContainer container;
2545   Mat_Redundant  *redund=NULL;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2549   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2550   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2551 
2552   A->ops->destroy = redund->Destroy;
2553 
2554   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2555   if (A->ops->destroy) {
2556     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2564 {
2565   PetscMPIInt    rank,size;
2566   MPI_Comm       comm;
2567   PetscErrorCode ierr;
2568   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2569   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2570   PetscInt       *rowrange = mat->rmap->range;
2571   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2572   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2573   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2574   PetscScalar    *sbuf_a;
2575   PetscInt       nzlocal=a->nz+b->nz;
2576   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2577   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2578   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2579   MatScalar      *aworkA,*aworkB;
2580   PetscScalar    *vals;
2581   PetscMPIInt    tag1,tag2,tag3,imdex;
2582   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2583   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2584   MPI_Status     recv_status,*send_status;
2585   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2586   PetscInt       **rbuf_j=NULL;
2587   PetscScalar    **rbuf_a=NULL;
2588   Mat_Redundant  *redund =NULL;
2589   PetscContainer container;
2590 
2591   PetscFunctionBegin;
2592   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2593   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2595 
2596   if (reuse == MAT_REUSE_MATRIX) {
2597     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2598     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2599     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2600     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2601     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2602     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2603     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2604     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2605 
2606     nsends    = redund->nsends;
2607     nrecvs    = redund->nrecvs;
2608     send_rank = redund->send_rank;
2609     recv_rank = redund->recv_rank;
2610     sbuf_nz   = redund->sbuf_nz;
2611     rbuf_nz   = redund->rbuf_nz;
2612     sbuf_j    = redund->sbuf_j;
2613     sbuf_a    = redund->sbuf_a;
2614     rbuf_j    = redund->rbuf_j;
2615     rbuf_a    = redund->rbuf_a;
2616   }
2617 
2618   if (reuse == MAT_INITIAL_MATRIX) {
2619     PetscMPIInt subrank,subsize;
2620     PetscInt    nleftover,np_subcomm;
2621     /* get the destination processors' id send_rank, nsends and nrecvs */
2622     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2623     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2624     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2625 
2626     np_subcomm = size/nsubcomm;
2627     nleftover  = size - nsubcomm*np_subcomm;
2628 
2629     nsends = 0; nrecvs = 0;
2630     for (i=0; i<size; i++) { /* i=rank*/
2631       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2632         send_rank[nsends]   = i; nsends++;
2633         recv_rank[nrecvs++] = i;
2634       }
2635     }
2636     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2637       i = size-nleftover-1;
2638       j = 0;
2639       while (j < nsubcomm - nleftover) {
2640         send_rank[nsends++] = i;
2641         i--; j++;
2642       }
2643     }
2644 
2645     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2646       for (i=0; i<nleftover; i++) {
2647         recv_rank[nrecvs++] = size-nleftover+i;
2648       }
2649     }
2650 
2651     /* allocate sbuf_j, sbuf_a */
2652     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2653     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2654     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2655   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2656 
2657   /* copy mat's local entries into the buffers */
2658   if (reuse == MAT_INITIAL_MATRIX) {
2659     rownz_max = 0;
2660     rptr      = sbuf_j;
2661     cols      = sbuf_j + rend-rstart + 1;
2662     vals      = sbuf_a;
2663     rptr[0]   = 0;
2664     for (i=0; i<rend-rstart; i++) {
2665       row    = i + rstart;
2666       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2667       ncols  = nzA + nzB;
2668       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2669       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2670       /* load the column indices for this row into cols */
2671       lwrite = 0;
2672       for (l=0; l<nzB; l++) {
2673         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2674           vals[lwrite]   = aworkB[l];
2675           cols[lwrite++] = ctmp;
2676         }
2677       }
2678       for (l=0; l<nzA; l++) {
2679         vals[lwrite]   = aworkA[l];
2680         cols[lwrite++] = cstart + cworkA[l];
2681       }
2682       for (l=0; l<nzB; l++) {
2683         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2684           vals[lwrite]   = aworkB[l];
2685           cols[lwrite++] = ctmp;
2686         }
2687       }
2688       vals     += ncols;
2689       cols     += ncols;
2690       rptr[i+1] = rptr[i] + ncols;
2691       if (rownz_max < ncols) rownz_max = ncols;
2692     }
2693     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2694   } else { /* only copy matrix values into sbuf_a */
2695     rptr    = sbuf_j;
2696     vals    = sbuf_a;
2697     rptr[0] = 0;
2698     for (i=0; i<rend-rstart; i++) {
2699       row    = i + rstart;
2700       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2701       ncols  = nzA + nzB;
2702       cworkB = b->j + b->i[i];
2703       aworkA = a->a + a->i[i];
2704       aworkB = b->a + b->i[i];
2705       lwrite = 0;
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2708       }
2709       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2710       for (l=0; l<nzB; l++) {
2711         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2712       }
2713       vals     += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715     }
2716   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2717 
2718   /* send nzlocal to others, and recv other's nzlocal */
2719   /*--------------------------------------------------*/
2720   if (reuse == MAT_INITIAL_MATRIX) {
2721     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2722 
2723     s_waits2 = s_waits3 + nsends;
2724     s_waits1 = s_waits2 + nsends;
2725     r_waits1 = s_waits1 + nsends;
2726     r_waits2 = r_waits1 + nrecvs;
2727     r_waits3 = r_waits2 + nrecvs;
2728   } else {
2729     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2730 
2731     r_waits3 = s_waits3 + nsends;
2732   }
2733 
2734   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2735   if (reuse == MAT_INITIAL_MATRIX) {
2736     /* get new tags to keep the communication clean */
2737     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2738     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2739     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2740 
2741     /* post receives of other's nzlocal */
2742     for (i=0; i<nrecvs; i++) {
2743       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2744     }
2745     /* send nzlocal to others */
2746     for (i=0; i<nsends; i++) {
2747       sbuf_nz[i] = nzlocal;
2748       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2749     }
2750     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2751     count = nrecvs;
2752     while (count) {
2753       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2754 
2755       recv_rank[imdex] = recv_status.MPI_SOURCE;
2756       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2757       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2758 
2759       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2760 
2761       rbuf_nz[imdex] += i + 2;
2762 
2763       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2764       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2765       count--;
2766     }
2767     /* wait on sends of nzlocal */
2768     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2769     /* send mat->i,j to others, and recv from other's */
2770     /*------------------------------------------------*/
2771     for (i=0; i<nsends; i++) {
2772       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2773       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2774     }
2775     /* wait on receives of mat->i,j */
2776     /*------------------------------*/
2777     count = nrecvs;
2778     while (count) {
2779       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2780       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2781       count--;
2782     }
2783     /* wait on sends of mat->i,j */
2784     /*---------------------------*/
2785     if (nsends) {
2786       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2787     }
2788   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2789 
2790   /* post receives, send and receive mat->a */
2791   /*----------------------------------------*/
2792   for (imdex=0; imdex<nrecvs; imdex++) {
2793     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2794   }
2795   for (i=0; i<nsends; i++) {
2796     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2797   }
2798   count = nrecvs;
2799   while (count) {
2800     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2801     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2802     count--;
2803   }
2804   if (nsends) {
2805     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2806   }
2807 
2808   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2809 
2810   /* create redundant matrix */
2811   /*-------------------------*/
2812   if (reuse == MAT_INITIAL_MATRIX) {
2813     /* compute rownz_max for preallocation */
2814     for (imdex=0; imdex<nrecvs; imdex++) {
2815       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2816       rptr = rbuf_j[imdex];
2817       for (i=0; i<j; i++) {
2818         ncols = rptr[i+1] - rptr[i];
2819         if (rownz_max < ncols) rownz_max = ncols;
2820       }
2821     }
2822 
2823     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2824     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2825     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2826     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2827     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2828     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2829   } else {
2830     C = *matredundant;
2831   }
2832 
2833   /* insert local matrix entries */
2834   rptr = sbuf_j;
2835   cols = sbuf_j + rend-rstart + 1;
2836   vals = sbuf_a;
2837   for (i=0; i<rend-rstart; i++) {
2838     row   = i + rstart;
2839     ncols = rptr[i+1] - rptr[i];
2840     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2841     vals += ncols;
2842     cols += ncols;
2843   }
2844   /* insert received matrix entries */
2845   for (imdex=0; imdex<nrecvs; imdex++) {
2846     rstart = rowrange[recv_rank[imdex]];
2847     rend   = rowrange[recv_rank[imdex]+1];
2848     rptr   = rbuf_j[imdex];
2849     cols   = rbuf_j[imdex] + rend-rstart + 1;
2850     vals   = rbuf_a[imdex];
2851     for (i=0; i<rend-rstart; i++) {
2852       row   = i + rstart;
2853       ncols = rptr[i+1] - rptr[i];
2854       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2855       vals += ncols;
2856       cols += ncols;
2857     }
2858   }
2859   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2862   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2863   if (reuse == MAT_INITIAL_MATRIX) {
2864     PetscContainer container;
2865     *matredundant = C;
2866     /* create a supporting struct and attach it to C for reuse */
2867     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2868     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2869     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2870     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2871     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2872     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2873 
2874     redund->nzlocal   = nzlocal;
2875     redund->nsends    = nsends;
2876     redund->nrecvs    = nrecvs;
2877     redund->send_rank = send_rank;
2878     redund->recv_rank = recv_rank;
2879     redund->sbuf_nz   = sbuf_nz;
2880     redund->rbuf_nz   = rbuf_nz;
2881     redund->sbuf_j    = sbuf_j;
2882     redund->sbuf_a    = sbuf_a;
2883     redund->rbuf_j    = rbuf_j;
2884     redund->rbuf_a    = rbuf_a;
2885 
2886     redund->Destroy = C->ops->destroy;
2887     C->ops->destroy = MatDestroy_MatRedundant;
2888   }
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2894 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2895 {
2896   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2897   PetscErrorCode ierr;
2898   PetscInt       i,*idxb = 0;
2899   PetscScalar    *va,*vb;
2900   Vec            vtmp;
2901 
2902   PetscFunctionBegin;
2903   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2904   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2905   if (idx) {
2906     for (i=0; i<A->rmap->n; i++) {
2907       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2908     }
2909   }
2910 
2911   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2912   if (idx) {
2913     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2914   }
2915   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2916   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2917 
2918   for (i=0; i<A->rmap->n; i++) {
2919     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2920       va[i] = vb[i];
2921       if (idx) idx[i] = a->garray[idxb[i]];
2922     }
2923   }
2924 
2925   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2927   ierr = PetscFree(idxb);CHKERRQ(ierr);
2928   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2934 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2935 {
2936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2937   PetscErrorCode ierr;
2938   PetscInt       i,*idxb = 0;
2939   PetscScalar    *va,*vb;
2940   Vec            vtmp;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2944   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2945   if (idx) {
2946     for (i=0; i<A->cmap->n; i++) {
2947       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2948     }
2949   }
2950 
2951   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2952   if (idx) {
2953     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2954   }
2955   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2956   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2957 
2958   for (i=0; i<A->rmap->n; i++) {
2959     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2960       va[i] = vb[i];
2961       if (idx) idx[i] = a->garray[idxb[i]];
2962     }
2963   }
2964 
2965   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2966   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2967   ierr = PetscFree(idxb);CHKERRQ(ierr);
2968   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2974 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2975 {
2976   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2977   PetscInt       n      = A->rmap->n;
2978   PetscInt       cstart = A->cmap->rstart;
2979   PetscInt       *cmap  = mat->garray;
2980   PetscInt       *diagIdx, *offdiagIdx;
2981   Vec            diagV, offdiagV;
2982   PetscScalar    *a, *diagA, *offdiagA;
2983   PetscInt       r;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2990   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2991   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2992   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2993   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2994   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2995   for (r = 0; r < n; ++r) {
2996     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2997       a[r]   = diagA[r];
2998       idx[r] = cstart + diagIdx[r];
2999     } else {
3000       a[r]   = offdiagA[r];
3001       idx[r] = cmap[offdiagIdx[r]];
3002     }
3003   }
3004   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3005   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3007   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3008   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3009   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3015 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3016 {
3017   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3018   PetscInt       n      = A->rmap->n;
3019   PetscInt       cstart = A->cmap->rstart;
3020   PetscInt       *cmap  = mat->garray;
3021   PetscInt       *diagIdx, *offdiagIdx;
3022   Vec            diagV, offdiagV;
3023   PetscScalar    *a, *diagA, *offdiagA;
3024   PetscInt       r;
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3029   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3031   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3032   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3033   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3034   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3035   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3036   for (r = 0; r < n; ++r) {
3037     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3038       a[r]   = diagA[r];
3039       idx[r] = cstart + diagIdx[r];
3040     } else {
3041       a[r]   = offdiagA[r];
3042       idx[r] = cmap[offdiagIdx[r]];
3043     }
3044   }
3045   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3046   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3048   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3049   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3050   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3056 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3057 {
3058   PetscErrorCode ierr;
3059   Mat            *dummy;
3060 
3061   PetscFunctionBegin;
3062   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3063   *newmat = *dummy;
3064   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3072 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3073 {
3074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3075   PetscErrorCode ierr;
3076 
3077   PetscFunctionBegin;
3078   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 #undef __FUNCT__
3083 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3084 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3085 {
3086   PetscErrorCode ierr;
3087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3088 
3089   PetscFunctionBegin;
3090   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3091   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3092   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 /* -------------------------------------------------------------------*/
3098 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3099        MatGetRow_MPIAIJ,
3100        MatRestoreRow_MPIAIJ,
3101        MatMult_MPIAIJ,
3102 /* 4*/ MatMultAdd_MPIAIJ,
3103        MatMultTranspose_MPIAIJ,
3104        MatMultTransposeAdd_MPIAIJ,
3105 #if defined(PETSC_HAVE_PBGL)
3106        MatSolve_MPIAIJ,
3107 #else
3108        0,
3109 #endif
3110        0,
3111        0,
3112 /*10*/ 0,
3113        0,
3114        0,
3115        MatSOR_MPIAIJ,
3116        MatTranspose_MPIAIJ,
3117 /*15*/ MatGetInfo_MPIAIJ,
3118        MatEqual_MPIAIJ,
3119        MatGetDiagonal_MPIAIJ,
3120        MatDiagonalScale_MPIAIJ,
3121        MatNorm_MPIAIJ,
3122 /*20*/ MatAssemblyBegin_MPIAIJ,
3123        MatAssemblyEnd_MPIAIJ,
3124        MatSetOption_MPIAIJ,
3125        MatZeroEntries_MPIAIJ,
3126 /*24*/ MatZeroRows_MPIAIJ,
3127        0,
3128 #if defined(PETSC_HAVE_PBGL)
3129        0,
3130 #else
3131        0,
3132 #endif
3133        0,
3134        0,
3135 /*29*/ MatSetUp_MPIAIJ,
3136 #if defined(PETSC_HAVE_PBGL)
3137        0,
3138 #else
3139        0,
3140 #endif
3141        0,
3142        0,
3143        0,
3144 /*34*/ MatDuplicate_MPIAIJ,
3145        0,
3146        0,
3147        0,
3148        0,
3149 /*39*/ MatAXPY_MPIAIJ,
3150        MatGetSubMatrices_MPIAIJ,
3151        MatIncreaseOverlap_MPIAIJ,
3152        MatGetValues_MPIAIJ,
3153        MatCopy_MPIAIJ,
3154 /*44*/ MatGetRowMax_MPIAIJ,
3155        MatScale_MPIAIJ,
3156        0,
3157        0,
3158        MatZeroRowsColumns_MPIAIJ,
3159 /*49*/ MatSetRandom_MPIAIJ,
3160        0,
3161        0,
3162        0,
3163        0,
3164 /*54*/ MatFDColoringCreate_MPIAIJ,
3165        0,
3166        MatSetUnfactored_MPIAIJ,
3167        MatPermute_MPIAIJ,
3168        0,
3169 /*59*/ MatGetSubMatrix_MPIAIJ,
3170        MatDestroy_MPIAIJ,
3171        MatView_MPIAIJ,
3172        0,
3173        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3174 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3175        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3176        0,
3177        0,
3178        0,
3179 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3180        MatGetRowMinAbs_MPIAIJ,
3181        0,
3182        MatSetColoring_MPIAIJ,
3183        0,
3184        MatSetValuesAdifor_MPIAIJ,
3185 /*75*/ MatFDColoringApply_AIJ,
3186        0,
3187        0,
3188        0,
3189        MatFindZeroDiagonals_MPIAIJ,
3190 /*80*/ 0,
3191        0,
3192        0,
3193 /*83*/ MatLoad_MPIAIJ,
3194        0,
3195        0,
3196        0,
3197        0,
3198        0,
3199 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3200        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3201        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3202        MatPtAP_MPIAIJ_MPIAIJ,
3203        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3204 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3205        0,
3206        0,
3207        0,
3208        0,
3209 /*99*/ 0,
3210        0,
3211        0,
3212        MatConjugate_MPIAIJ,
3213        0,
3214 /*104*/MatSetValuesRow_MPIAIJ,
3215        MatRealPart_MPIAIJ,
3216        MatImaginaryPart_MPIAIJ,
3217        0,
3218        0,
3219 /*109*/0,
3220        MatGetRedundantMatrix_MPIAIJ,
3221        MatGetRowMin_MPIAIJ,
3222        0,
3223        0,
3224 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3225        0,
3226        0,
3227        0,
3228        0,
3229 /*119*/0,
3230        0,
3231        0,
3232        0,
3233        MatGetMultiProcBlock_MPIAIJ,
3234 /*124*/MatFindNonzeroRows_MPIAIJ,
3235        MatGetColumnNorms_MPIAIJ,
3236        MatInvertBlockDiagonal_MPIAIJ,
3237        0,
3238        MatGetSubMatricesParallel_MPIAIJ,
3239 /*129*/0,
3240        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3241        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3242        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3243        0,
3244 /*134*/0,
3245        0,
3246        0,
3247        0,
3248        0
3249 };
3250 
3251 /* ----------------------------------------------------------------------------------------*/
3252 
3253 EXTERN_C_BEGIN
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3256 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3257 {
3258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3259   PetscErrorCode ierr;
3260 
3261   PetscFunctionBegin;
3262   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3263   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 EXTERN_C_END
3267 
3268 EXTERN_C_BEGIN
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3271 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3272 {
3273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3278   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3279   PetscFunctionReturn(0);
3280 }
3281 EXTERN_C_END
3282 
3283 EXTERN_C_BEGIN
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3286 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3287 {
3288   Mat_MPIAIJ     *b;
3289   PetscErrorCode ierr;
3290   PetscInt       i;
3291   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3292 
3293   PetscFunctionBegin;
3294   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3295   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3296   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3297   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3298   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3299   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3300 
3301   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3302   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3303   if (d_nnz) {
3304     for (i=0; i<B->rmap->n; i++) {
3305       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3306     }
3307   }
3308   if (o_nnz) {
3309     for (i=0; i<B->rmap->n; i++) {
3310       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3311     }
3312   }
3313   b = (Mat_MPIAIJ*)B->data;
3314 
3315   if (!B->preallocated) {
3316     /* Explicitly create 2 MATSEQAIJ matrices. */
3317     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3318     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3319     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3320     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3321     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3322     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3323     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3324     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3325     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3326     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3327   }
3328 
3329   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3330   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3331   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3332   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3333   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3334   B->preallocated = PETSC_TRUE;
3335   PetscFunctionReturn(0);
3336 }
3337 EXTERN_C_END
3338 
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3341 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3342 {
3343   Mat            mat;
3344   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3345   PetscErrorCode ierr;
3346 
3347   PetscFunctionBegin;
3348   *newmat = 0;
3349   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3350   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3351   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3352   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3353   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3354   a       = (Mat_MPIAIJ*)mat->data;
3355 
3356   mat->factortype   = matin->factortype;
3357   mat->rmap->bs     = matin->rmap->bs;
3358   mat->cmap->bs     = matin->cmap->bs;
3359   mat->assembled    = PETSC_TRUE;
3360   mat->insertmode   = NOT_SET_VALUES;
3361   mat->preallocated = PETSC_TRUE;
3362 
3363   a->size         = oldmat->size;
3364   a->rank         = oldmat->rank;
3365   a->donotstash   = oldmat->donotstash;
3366   a->roworiented  = oldmat->roworiented;
3367   a->rowindices   = 0;
3368   a->rowvalues    = 0;
3369   a->getrowactive = PETSC_FALSE;
3370 
3371   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3372   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3373 
3374   if (oldmat->colmap) {
3375 #if defined(PETSC_USE_CTABLE)
3376     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3377 #else
3378     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3379     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3380     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3381 #endif
3382   } else a->colmap = 0;
3383   if (oldmat->garray) {
3384     PetscInt len;
3385     len  = oldmat->B->cmap->n;
3386     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3387     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3388     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3389   } else a->garray = 0;
3390 
3391   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3393   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3395   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3396   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3397   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3398   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3399   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3400   *newmat = mat;
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 
3405 
3406 #undef __FUNCT__
3407 #define __FUNCT__ "MatLoad_MPIAIJ"
3408 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3409 {
3410   PetscScalar    *vals,*svals;
3411   MPI_Comm       comm;
3412   PetscErrorCode ierr;
3413   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3414   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3415   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3416   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3417   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3418   int            fd;
3419   PetscInt       bs = 1;
3420 
3421   PetscFunctionBegin;
3422   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3424   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3425   if (!rank) {
3426     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3427     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3428     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3429   }
3430 
3431   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3432   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3433   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3434 
3435   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3436 
3437   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3438   M    = header[1]; N = header[2];
3439   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3440   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3441   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3442 
3443   /* If global sizes are set, check if they are consistent with that given in the file */
3444   if (sizesset) {
3445     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3446   }
3447   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3448   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3449 
3450   /* determine ownership of all (block) rows */
3451   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3452   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3453   else m = newMat->rmap->n; /* Set by user */
3454 
3455   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3456   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3457 
3458   /* First process needs enough room for process with most rows */
3459   if (!rank) {
3460     mmax = rowners[1];
3461     for (i=2; i<=size; i++) {
3462       mmax = PetscMax(mmax, rowners[i]);
3463     }
3464   } else mmax = m;
3465 
3466   rowners[0] = 0;
3467   for (i=2; i<=size; i++) {
3468     rowners[i] += rowners[i-1];
3469   }
3470   rstart = rowners[rank];
3471   rend   = rowners[rank+1];
3472 
3473   /* distribute row lengths to all processors */
3474   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3475   if (!rank) {
3476     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3477     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3478     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3479     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3480     for (j=0; j<m; j++) {
3481       procsnz[0] += ourlens[j];
3482     }
3483     for (i=1; i<size; i++) {
3484       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3485       /* calculate the number of nonzeros on each processor */
3486       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3487         procsnz[i] += rowlengths[j];
3488       }
3489       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3492   } else {
3493     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3494   }
3495 
3496   if (!rank) {
3497     /* determine max buffer needed and allocate it */
3498     maxnz = 0;
3499     for (i=0; i<size; i++) {
3500       maxnz = PetscMax(maxnz,procsnz[i]);
3501     }
3502     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3503 
3504     /* read in my part of the matrix column indices  */
3505     nz   = procsnz[0];
3506     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3507     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3508 
3509     /* read in every one elses and ship off */
3510     for (i=1; i<size; i++) {
3511       nz   = procsnz[i];
3512       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3513       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3514     }
3515     ierr = PetscFree(cols);CHKERRQ(ierr);
3516   } else {
3517     /* determine buffer space needed for message */
3518     nz = 0;
3519     for (i=0; i<m; i++) {
3520       nz += ourlens[i];
3521     }
3522     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3523 
3524     /* receive message of column indices*/
3525     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3526   }
3527 
3528   /* determine column ownership if matrix is not square */
3529   if (N != M) {
3530     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3531     else n = newMat->cmap->n;
3532     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3533     cstart = cend - n;
3534   } else {
3535     cstart = rstart;
3536     cend   = rend;
3537     n      = cend - cstart;
3538   }
3539 
3540   /* loop over local rows, determining number of off diagonal entries */
3541   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3542   jj   = 0;
3543   for (i=0; i<m; i++) {
3544     for (j=0; j<ourlens[i]; j++) {
3545       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3546       jj++;
3547     }
3548   }
3549 
3550   for (i=0; i<m; i++) {
3551     ourlens[i] -= offlens[i];
3552   }
3553   if (!sizesset) {
3554     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3555   }
3556 
3557   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3558 
3559   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3560 
3561   for (i=0; i<m; i++) {
3562     ourlens[i] += offlens[i];
3563   }
3564 
3565   if (!rank) {
3566     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3567 
3568     /* read in my part of the matrix numerical values  */
3569     nz   = procsnz[0];
3570     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3571 
3572     /* insert into matrix */
3573     jj      = rstart;
3574     smycols = mycols;
3575     svals   = vals;
3576     for (i=0; i<m; i++) {
3577       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3578       smycols += ourlens[i];
3579       svals   += ourlens[i];
3580       jj++;
3581     }
3582 
3583     /* read in other processors and ship out */
3584     for (i=1; i<size; i++) {
3585       nz   = procsnz[i];
3586       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3587       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3588     }
3589     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3590   } else {
3591     /* receive numeric values */
3592     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3593 
3594     /* receive message of values*/
3595     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3596 
3597     /* insert into matrix */
3598     jj      = rstart;
3599     smycols = mycols;
3600     svals   = vals;
3601     for (i=0; i<m; i++) {
3602       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3603       smycols += ourlens[i];
3604       svals   += ourlens[i];
3605       jj++;
3606     }
3607   }
3608   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3609   ierr = PetscFree(vals);CHKERRQ(ierr);
3610   ierr = PetscFree(mycols);CHKERRQ(ierr);
3611   ierr = PetscFree(rowners);CHKERRQ(ierr);
3612   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3619 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3620 {
3621   PetscErrorCode ierr;
3622   IS             iscol_local;
3623   PetscInt       csize;
3624 
3625   PetscFunctionBegin;
3626   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3627   if (call == MAT_REUSE_MATRIX) {
3628     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3629     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3630   } else {
3631     PetscInt cbs;
3632     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3633     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3634     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3635   }
3636   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3637   if (call == MAT_INITIAL_MATRIX) {
3638     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3639     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3640   }
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3647 /*
3648     Not great since it makes two copies of the submatrix, first an SeqAIJ
3649   in local and then by concatenating the local matrices the end result.
3650   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3651 
3652   Note: This requires a sequential iscol with all indices.
3653 */
3654 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3655 {
3656   PetscErrorCode ierr;
3657   PetscMPIInt    rank,size;
3658   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3659   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3660   PetscBool      allcolumns, colflag;
3661   Mat            M,Mreuse;
3662   MatScalar      *vwork,*aa;
3663   MPI_Comm       comm;
3664   Mat_SeqAIJ     *aij;
3665 
3666   PetscFunctionBegin;
3667   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3668   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3669   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3670 
3671   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3672   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3673   if (colflag && ncol == mat->cmap->N) {
3674     allcolumns = PETSC_TRUE;
3675   } else {
3676     allcolumns = PETSC_FALSE;
3677   }
3678   if (call ==  MAT_REUSE_MATRIX) {
3679     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3680     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3681     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3682   } else {
3683     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3684   }
3685 
3686   /*
3687       m - number of local rows
3688       n - number of columns (same on all processors)
3689       rstart - first row in new global matrix generated
3690   */
3691   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3692   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3693   if (call == MAT_INITIAL_MATRIX) {
3694     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3695     ii  = aij->i;
3696     jj  = aij->j;
3697 
3698     /*
3699         Determine the number of non-zeros in the diagonal and off-diagonal
3700         portions of the matrix in order to do correct preallocation
3701     */
3702 
3703     /* first get start and end of "diagonal" columns */
3704     if (csize == PETSC_DECIDE) {
3705       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3706       if (mglobal == n) { /* square matrix */
3707         nlocal = m;
3708       } else {
3709         nlocal = n/size + ((n % size) > rank);
3710       }
3711     } else {
3712       nlocal = csize;
3713     }
3714     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3715     rstart = rend - nlocal;
3716     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3717 
3718     /* next, compute all the lengths */
3719     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3720     olens = dlens + m;
3721     for (i=0; i<m; i++) {
3722       jend = ii[i+1] - ii[i];
3723       olen = 0;
3724       dlen = 0;
3725       for (j=0; j<jend; j++) {
3726         if (*jj < rstart || *jj >= rend) olen++;
3727         else dlen++;
3728         jj++;
3729       }
3730       olens[i] = olen;
3731       dlens[i] = dlen;
3732     }
3733     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3734     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3735     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3736     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3737     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3738     ierr = PetscFree(dlens);CHKERRQ(ierr);
3739   } else {
3740     PetscInt ml,nl;
3741 
3742     M    = *newmat;
3743     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3744     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3745     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3746     /*
3747          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3748        rather than the slower MatSetValues().
3749     */
3750     M->was_assembled = PETSC_TRUE;
3751     M->assembled     = PETSC_FALSE;
3752   }
3753   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3754   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3755   ii   = aij->i;
3756   jj   = aij->j;
3757   aa   = aij->a;
3758   for (i=0; i<m; i++) {
3759     row   = rstart + i;
3760     nz    = ii[i+1] - ii[i];
3761     cwork = jj;     jj += nz;
3762     vwork = aa;     aa += nz;
3763     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3764   }
3765 
3766   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   *newmat = M;
3769 
3770   /* save submatrix used in processor for next request */
3771   if (call ==  MAT_INITIAL_MATRIX) {
3772     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3773     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3774   }
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 EXTERN_C_BEGIN
3779 #undef __FUNCT__
3780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3781 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3782 {
3783   PetscInt       m,cstart, cend,j,nnz,i,d;
3784   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3785   const PetscInt *JJ;
3786   PetscScalar    *values;
3787   PetscErrorCode ierr;
3788 
3789   PetscFunctionBegin;
3790   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3791 
3792   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3793   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3794   m      = B->rmap->n;
3795   cstart = B->cmap->rstart;
3796   cend   = B->cmap->rend;
3797   rstart = B->rmap->rstart;
3798 
3799   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3800 
3801 #if defined(PETSC_USE_DEBUGGING)
3802   for (i=0; i<m; i++) {
3803     nnz = Ii[i+1]- Ii[i];
3804     JJ  = J + Ii[i];
3805     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3806     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3807     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3808   }
3809 #endif
3810 
3811   for (i=0; i<m; i++) {
3812     nnz     = Ii[i+1]- Ii[i];
3813     JJ      = J + Ii[i];
3814     nnz_max = PetscMax(nnz_max,nnz);
3815     d       = 0;
3816     for (j=0; j<nnz; j++) {
3817       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3818     }
3819     d_nnz[i] = d;
3820     o_nnz[i] = nnz - d;
3821   }
3822   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3823   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3824 
3825   if (v) values = (PetscScalar*)v;
3826   else {
3827     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3828     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3829   }
3830 
3831   for (i=0; i<m; i++) {
3832     ii   = i + rstart;
3833     nnz  = Ii[i+1]- Ii[i];
3834     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3835   }
3836   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3837   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3838 
3839   if (!v) {
3840     ierr = PetscFree(values);CHKERRQ(ierr);
3841   }
3842   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3843   PetscFunctionReturn(0);
3844 }
3845 EXTERN_C_END
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3849 /*@
3850    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3851    (the default parallel PETSc format).
3852 
3853    Collective on MPI_Comm
3854 
3855    Input Parameters:
3856 +  B - the matrix
3857 .  i - the indices into j for the start of each local row (starts with zero)
3858 .  j - the column indices for each local row (starts with zero)
3859 -  v - optional values in the matrix
3860 
3861    Level: developer
3862 
3863    Notes:
3864        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3865      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3866      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3867 
3868        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3869 
3870        The format which is used for the sparse matrix input, is equivalent to a
3871     row-major ordering.. i.e for the following matrix, the input data expected is
3872     as shown:
3873 
3874         1 0 0
3875         2 0 3     P0
3876        -------
3877         4 5 6     P1
3878 
3879      Process0 [P0]: rows_owned=[0,1]
3880         i =  {0,1,3}  [size = nrow+1  = 2+1]
3881         j =  {0,0,2}  [size = nz = 6]
3882         v =  {1,2,3}  [size = nz = 6]
3883 
3884      Process1 [P1]: rows_owned=[2]
3885         i =  {0,3}    [size = nrow+1  = 1+1]
3886         j =  {0,1,2}  [size = nz = 6]
3887         v =  {4,5,6}  [size = nz = 6]
3888 
3889 .keywords: matrix, aij, compressed row, sparse, parallel
3890 
3891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3892           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3893 @*/
3894 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3895 {
3896   PetscErrorCode ierr;
3897 
3898   PetscFunctionBegin;
3899   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective on MPI_Comm
3913 
3914    Input Parameters:
3915 +  A - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL, if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL, if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4034           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4035 @*/
4036 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4042   PetscValidType(B,1);
4043   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 #undef __FUNCT__
4048 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4049 /*@
4050      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4051          CSR format the local rows.
4052 
4053    Collective on MPI_Comm
4054 
4055    Input Parameters:
4056 +  comm - MPI communicator
4057 .  m - number of local rows (Cannot be PETSC_DECIDE)
4058 .  n - This value should be the same as the local size used in creating the
4059        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4060        calculated if N is given) For square matrices n is almost always m.
4061 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4062 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4063 .   i - row indices
4064 .   j - column indices
4065 -   a - matrix values
4066 
4067    Output Parameter:
4068 .   mat - the matrix
4069 
4070    Level: intermediate
4071 
4072    Notes:
4073        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4074      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4075      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4076 
4077        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4078 
4079        The format which is used for the sparse matrix input, is equivalent to a
4080     row-major ordering.. i.e for the following matrix, the input data expected is
4081     as shown:
4082 
4083         1 0 0
4084         2 0 3     P0
4085        -------
4086         4 5 6     P1
4087 
4088      Process0 [P0]: rows_owned=[0,1]
4089         i =  {0,1,3}  [size = nrow+1  = 2+1]
4090         j =  {0,0,2}  [size = nz = 6]
4091         v =  {1,2,3}  [size = nz = 6]
4092 
4093      Process1 [P1]: rows_owned=[2]
4094         i =  {0,3}    [size = nrow+1  = 1+1]
4095         j =  {0,1,2}  [size = nz = 6]
4096         v =  {4,5,6}  [size = nz = 6]
4097 
4098 .keywords: matrix, aij, compressed row, sparse, parallel
4099 
4100 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4101           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4102 @*/
4103 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4104 {
4105   PetscErrorCode ierr;
4106 
4107   PetscFunctionBegin;
4108   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4109   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4110   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4111   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4112   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4113   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4114   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 #undef __FUNCT__
4119 #define __FUNCT__ "MatCreateAIJ"
4120 /*@C
4121    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4122    (the default parallel PETSc format).  For good matrix assembly performance
4123    the user should preallocate the matrix storage by setting the parameters
4124    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4125    performance can be increased by more than a factor of 50.
4126 
4127    Collective on MPI_Comm
4128 
4129    Input Parameters:
4130 +  comm - MPI communicator
4131 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4132            This value should be the same as the local size used in creating the
4133            y vector for the matrix-vector product y = Ax.
4134 .  n - This value should be the same as the local size used in creating the
4135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4136        calculated if N is given) For square matrices n is almost always m.
4137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4139 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4140            (same value is used for all local rows)
4141 .  d_nnz - array containing the number of nonzeros in the various rows of the
4142            DIAGONAL portion of the local submatrix (possibly different for each row)
4143            or NULL, if d_nz is used to specify the nonzero structure.
4144            The size of this array is equal to the number of local rows, i.e 'm'.
4145 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4146            submatrix (same value is used for all local rows).
4147 -  o_nnz - array containing the number of nonzeros in the various rows of the
4148            OFF-DIAGONAL portion of the local submatrix (possibly different for
4149            each row) or NULL, if o_nz is used to specify the nonzero
4150            structure. The size of this array is equal to the number
4151            of local rows, i.e 'm'.
4152 
4153    Output Parameter:
4154 .  A - the matrix
4155 
4156    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4157    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4158    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4159 
4160    Notes:
4161    If the *_nnz parameter is given then the *_nz parameter is ignored
4162 
4163    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4164    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4165    storage requirements for this matrix.
4166 
4167    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4168    processor than it must be used on all processors that share the object for
4169    that argument.
4170 
4171    The user MUST specify either the local or global matrix dimensions
4172    (possibly both).
4173 
4174    The parallel matrix is partitioned across processors such that the
4175    first m0 rows belong to process 0, the next m1 rows belong to
4176    process 1, the next m2 rows belong to process 2 etc.. where
4177    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4178    values corresponding to [m x N] submatrix.
4179 
4180    The columns are logically partitioned with the n0 columns belonging
4181    to 0th partition, the next n1 columns belonging to the next
4182    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4183 
4184    The DIAGONAL portion of the local submatrix on any given processor
4185    is the submatrix corresponding to the rows and columns m,n
4186    corresponding to the given processor. i.e diagonal matrix on
4187    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4188    etc. The remaining portion of the local submatrix [m x (N-n)]
4189    constitute the OFF-DIAGONAL portion. The example below better
4190    illustrates this concept.
4191 
4192    For a square global matrix we define each processor's diagonal portion
4193    to be its local rows and the corresponding columns (a square submatrix);
4194    each processor's off-diagonal portion encompasses the remainder of the
4195    local matrix (a rectangular submatrix).
4196 
4197    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4198 
4199    When calling this routine with a single process communicator, a matrix of
4200    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4201    type of communicator, use the construction mechanism:
4202      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4203 
4204    By default, this format uses inodes (identical nodes) when possible.
4205    We search for consecutive rows with the same nonzero structure, thereby
4206    reusing matrix information to achieve increased efficiency.
4207 
4208    Options Database Keys:
4209 +  -mat_no_inode  - Do not use inodes
4210 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4211 -  -mat_aij_oneindex - Internally use indexing starting at 1
4212         rather than 0.  Note that when calling MatSetValues(),
4213         the user still MUST index entries starting at 0!
4214 
4215 
4216    Example usage:
4217 
4218    Consider the following 8x8 matrix with 34 non-zero values, that is
4219    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4220    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4221    as follows:
4222 
4223 .vb
4224             1  2  0  |  0  3  0  |  0  4
4225     Proc0   0  5  6  |  7  0  0  |  8  0
4226             9  0 10  | 11  0  0  | 12  0
4227     -------------------------------------
4228            13  0 14  | 15 16 17  |  0  0
4229     Proc1   0 18  0  | 19 20 21  |  0  0
4230             0  0  0  | 22 23  0  | 24  0
4231     -------------------------------------
4232     Proc2  25 26 27  |  0  0 28  | 29  0
4233            30  0  0  | 31 32 33  |  0 34
4234 .ve
4235 
4236    This can be represented as a collection of submatrices as:
4237 
4238 .vb
4239       A B C
4240       D E F
4241       G H I
4242 .ve
4243 
4244    Where the submatrices A,B,C are owned by proc0, D,E,F are
4245    owned by proc1, G,H,I are owned by proc2.
4246 
4247    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4248    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4249    The 'M','N' parameters are 8,8, and have the same values on all procs.
4250 
4251    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4252    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4253    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4254    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4255    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4256    matrix, ans [DF] as another SeqAIJ matrix.
4257 
4258    When d_nz, o_nz parameters are specified, d_nz storage elements are
4259    allocated for every row of the local diagonal submatrix, and o_nz
4260    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4261    One way to choose d_nz and o_nz is to use the max nonzerors per local
4262    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4263    In this case, the values of d_nz,o_nz are:
4264 .vb
4265      proc0 : dnz = 2, o_nz = 2
4266      proc1 : dnz = 3, o_nz = 2
4267      proc2 : dnz = 1, o_nz = 4
4268 .ve
4269    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4270    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4271    for proc3. i.e we are using 12+15+10=37 storage locations to store
4272    34 values.
4273 
4274    When d_nnz, o_nnz parameters are specified, the storage is specified
4275    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4276    In the above case the values for d_nnz,o_nnz are:
4277 .vb
4278      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4279      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4280      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4281 .ve
4282    Here the space allocated is sum of all the above values i.e 34, and
4283    hence pre-allocation is perfect.
4284 
4285    Level: intermediate
4286 
4287 .keywords: matrix, aij, compressed row, sparse, parallel
4288 
4289 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4290           MPIAIJ, MatCreateMPIAIJWithArrays()
4291 @*/
4292 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4293 {
4294   PetscErrorCode ierr;
4295   PetscMPIInt    size;
4296 
4297   PetscFunctionBegin;
4298   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4299   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4300   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4301   if (size > 1) {
4302     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4303     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4304   } else {
4305     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4306     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4307   }
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4313 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4314 {
4315   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4316 
4317   PetscFunctionBegin;
4318   *Ad     = a->A;
4319   *Ao     = a->B;
4320   *colmap = a->garray;
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4326 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4327 {
4328   PetscErrorCode ierr;
4329   PetscInt       i;
4330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4331 
4332   PetscFunctionBegin;
4333   if (coloring->ctype == IS_COLORING_GLOBAL) {
4334     ISColoringValue *allcolors,*colors;
4335     ISColoring      ocoloring;
4336 
4337     /* set coloring for diagonal portion */
4338     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4339 
4340     /* set coloring for off-diagonal portion */
4341     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4342     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4343     for (i=0; i<a->B->cmap->n; i++) {
4344       colors[i] = allcolors[a->garray[i]];
4345     }
4346     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4347     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4348     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4349     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4350   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4351     ISColoringValue *colors;
4352     PetscInt        *larray;
4353     ISColoring      ocoloring;
4354 
4355     /* set coloring for diagonal portion */
4356     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4357     for (i=0; i<a->A->cmap->n; i++) {
4358       larray[i] = i + A->cmap->rstart;
4359     }
4360     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4361     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4362     for (i=0; i<a->A->cmap->n; i++) {
4363       colors[i] = coloring->colors[larray[i]];
4364     }
4365     ierr = PetscFree(larray);CHKERRQ(ierr);
4366     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4367     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4368     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4369 
4370     /* set coloring for off-diagonal portion */
4371     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4372     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4373     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4374     for (i=0; i<a->B->cmap->n; i++) {
4375       colors[i] = coloring->colors[larray[i]];
4376     }
4377     ierr = PetscFree(larray);CHKERRQ(ierr);
4378     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4379     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4380     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4381   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 #undef __FUNCT__
4386 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4387 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4388 {
4389   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4390   PetscErrorCode ierr;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4394   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4400 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4401 {
4402   PetscErrorCode ierr;
4403   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4404   PetscInt       *indx;
4405 
4406   PetscFunctionBegin;
4407   /* This routine will ONLY return MPIAIJ type matrix */
4408   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4409   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4410   if (n == PETSC_DECIDE) {
4411     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4412   }
4413   /* Check sum(n) = N */
4414   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4415   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4416 
4417   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4418   rstart -= m;
4419 
4420   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4421   for (i=0; i<m; i++) {
4422     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4423     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4424     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4425   }
4426 
4427   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4428   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4429   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4430   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4431   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4432   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4438 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4439 {
4440   PetscErrorCode ierr;
4441   PetscInt       m,N,i,rstart,nnz,Ii;
4442   PetscInt       *indx;
4443   PetscScalar    *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4447   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4448   for (i=0; i<m; i++) {
4449     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4450     Ii   = i + rstart;
4451     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4452     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4453   }
4454   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4455   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 #undef __FUNCT__
4460 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4461 /*@
4462       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4463                  matrices from each processor
4464 
4465     Collective on MPI_Comm
4466 
4467    Input Parameters:
4468 +    comm - the communicators the parallel matrix will live on
4469 .    inmat - the input sequential matrices
4470 .    n - number of local columns (or PETSC_DECIDE)
4471 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4472 
4473    Output Parameter:
4474 .    outmat - the parallel matrix generated
4475 
4476     Level: advanced
4477 
4478    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4479 
4480 @*/
4481 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4482 {
4483   PetscErrorCode ierr;
4484 
4485   PetscFunctionBegin;
4486   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4487   if (scall == MAT_INITIAL_MATRIX) {
4488     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4489   }
4490   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4491   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #undef __FUNCT__
4496 #define __FUNCT__ "MatFileSplit"
4497 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4498 {
4499   PetscErrorCode    ierr;
4500   PetscMPIInt       rank;
4501   PetscInt          m,N,i,rstart,nnz;
4502   size_t            len;
4503   const PetscInt    *indx;
4504   PetscViewer       out;
4505   char              *name;
4506   Mat               B;
4507   const PetscScalar *values;
4508 
4509   PetscFunctionBegin;
4510   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4511   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4512   /* Should this be the type of the diagonal block of A? */
4513   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4514   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4515   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4516   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4517   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4518   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4519   for (i=0; i<m; i++) {
4520     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4521     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4522     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4523   }
4524   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526 
4527   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4528   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4529   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4530   sprintf(name,"%s.%d",outfile,rank);
4531   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4532   ierr = PetscFree(name);CHKERRQ(ierr);
4533   ierr = MatView(B,out);CHKERRQ(ierr);
4534   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4535   ierr = MatDestroy(&B);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4540 #undef __FUNCT__
4541 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4542 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4543 {
4544   PetscErrorCode      ierr;
4545   Mat_Merge_SeqsToMPI *merge;
4546   PetscContainer      container;
4547 
4548   PetscFunctionBegin;
4549   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4550   if (container) {
4551     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4564     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4565     ierr = PetscFree(merge);CHKERRQ(ierr);
4566     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4567   }
4568   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #include <../src/mat/utils/freespace.h>
4573 #include <petscbt.h>
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4577 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4578 {
4579   PetscErrorCode      ierr;
4580   MPI_Comm            comm;
4581   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4582   PetscMPIInt         size,rank,taga,*len_s;
4583   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4584   PetscInt            proc,m;
4585   PetscInt            **buf_ri,**buf_rj;
4586   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4587   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4588   MPI_Request         *s_waits,*r_waits;
4589   MPI_Status          *status;
4590   MatScalar           *aa=a->a;
4591   MatScalar           **abuf_r,*ba_i;
4592   Mat_Merge_SeqsToMPI *merge;
4593   PetscContainer      container;
4594 
4595   PetscFunctionBegin;
4596   ierr = PetscObjectGetComm((PetscObject)seqmat,&comm);CHKERRQ(ierr);
4597   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4598 
4599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4600   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4601 
4602   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4603   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4604 
4605   bi     = merge->bi;
4606   bj     = merge->bj;
4607   buf_ri = merge->buf_ri;
4608   buf_rj = merge->buf_rj;
4609 
4610   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4611   owners = merge->rowmap->range;
4612   len_s  = merge->len_s;
4613 
4614   /* send and recv matrix values */
4615   /*-----------------------------*/
4616   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4617   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4618 
4619   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4620   for (proc=0,k=0; proc<size; proc++) {
4621     if (!len_s[proc]) continue;
4622     i    = owners[proc];
4623     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4624     k++;
4625   }
4626 
4627   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4628   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4629   ierr = PetscFree(status);CHKERRQ(ierr);
4630 
4631   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4632   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4633 
4634   /* insert mat values of mpimat */
4635   /*----------------------------*/
4636   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4637   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4638 
4639   for (k=0; k<merge->nrecv; k++) {
4640     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4641     nrows       = *(buf_ri_k[k]);
4642     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4643     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4644   }
4645 
4646   /* set values of ba */
4647   m = merge->rowmap->n;
4648   for (i=0; i<m; i++) {
4649     arow = owners[rank] + i;
4650     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4651     bnzi = bi[i+1] - bi[i];
4652     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4653 
4654     /* add local non-zero vals of this proc's seqmat into ba */
4655     anzi   = ai[arow+1] - ai[arow];
4656     aj     = a->j + ai[arow];
4657     aa     = a->a + ai[arow];
4658     nextaj = 0;
4659     for (j=0; nextaj<anzi; j++) {
4660       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4661         ba_i[j] += aa[nextaj++];
4662       }
4663     }
4664 
4665     /* add received vals into ba */
4666     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4667       /* i-th row */
4668       if (i == *nextrow[k]) {
4669         anzi   = *(nextai[k]+1) - *nextai[k];
4670         aj     = buf_rj[k] + *(nextai[k]);
4671         aa     = abuf_r[k] + *(nextai[k]);
4672         nextaj = 0;
4673         for (j=0; nextaj<anzi; j++) {
4674           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4675             ba_i[j] += aa[nextaj++];
4676           }
4677         }
4678         nextrow[k]++; nextai[k]++;
4679       }
4680     }
4681     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4682   }
4683   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4684   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4685 
4686   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4687   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4688   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4689   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4690   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4691   PetscFunctionReturn(0);
4692 }
4693 
4694 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4695 
4696 #undef __FUNCT__
4697 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4698 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4699 {
4700   PetscErrorCode      ierr;
4701   Mat                 B_mpi;
4702   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4703   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4704   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4705   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4706   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4707   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4708   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4709   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4710   MPI_Status          *status;
4711   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4712   PetscBT             lnkbt;
4713   Mat_Merge_SeqsToMPI *merge;
4714   PetscContainer      container;
4715 
4716   PetscFunctionBegin;
4717   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4718 
4719   /* make sure it is a PETSc comm */
4720   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4723 
4724   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4725   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4726 
4727   /* determine row ownership */
4728   /*---------------------------------------------------------*/
4729   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4730   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4731   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4732   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4733   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4734   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4735   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4736 
4737   m      = merge->rowmap->n;
4738   owners = merge->rowmap->range;
4739 
4740   /* determine the number of messages to send, their lengths */
4741   /*---------------------------------------------------------*/
4742   len_s = merge->len_s;
4743 
4744   len          = 0; /* length of buf_si[] */
4745   merge->nsend = 0;
4746   for (proc=0; proc<size; proc++) {
4747     len_si[proc] = 0;
4748     if (proc == rank) {
4749       len_s[proc] = 0;
4750     } else {
4751       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4752       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4753     }
4754     if (len_s[proc]) {
4755       merge->nsend++;
4756       nrows = 0;
4757       for (i=owners[proc]; i<owners[proc+1]; i++) {
4758         if (ai[i+1] > ai[i]) nrows++;
4759       }
4760       len_si[proc] = 2*(nrows+1);
4761       len         += len_si[proc];
4762     }
4763   }
4764 
4765   /* determine the number and length of messages to receive for ij-structure */
4766   /*-------------------------------------------------------------------------*/
4767   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4768   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4769 
4770   /* post the Irecv of j-structure */
4771   /*-------------------------------*/
4772   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4773   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4774 
4775   /* post the Isend of j-structure */
4776   /*--------------------------------*/
4777   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4778 
4779   for (proc=0, k=0; proc<size; proc++) {
4780     if (!len_s[proc]) continue;
4781     i    = owners[proc];
4782     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4783     k++;
4784   }
4785 
4786   /* receives and sends of j-structure are complete */
4787   /*------------------------------------------------*/
4788   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4789   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4790 
4791   /* send and recv i-structure */
4792   /*---------------------------*/
4793   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4794   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4795 
4796   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4797   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4798   for (proc=0,k=0; proc<size; proc++) {
4799     if (!len_s[proc]) continue;
4800     /* form outgoing message for i-structure:
4801          buf_si[0]:                 nrows to be sent
4802                [1:nrows]:           row index (global)
4803                [nrows+1:2*nrows+1]: i-structure index
4804     */
4805     /*-------------------------------------------*/
4806     nrows       = len_si[proc]/2 - 1;
4807     buf_si_i    = buf_si + nrows+1;
4808     buf_si[0]   = nrows;
4809     buf_si_i[0] = 0;
4810     nrows       = 0;
4811     for (i=owners[proc]; i<owners[proc+1]; i++) {
4812       anzi = ai[i+1] - ai[i];
4813       if (anzi) {
4814         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4815         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4816         nrows++;
4817       }
4818     }
4819     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4820     k++;
4821     buf_si += len_si[proc];
4822   }
4823 
4824   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4825   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4826 
4827   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4828   for (i=0; i<merge->nrecv; i++) {
4829     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4830   }
4831 
4832   ierr = PetscFree(len_si);CHKERRQ(ierr);
4833   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4834   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4835   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4836   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4837   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4838   ierr = PetscFree(status);CHKERRQ(ierr);
4839 
4840   /* compute a local seq matrix in each processor */
4841   /*----------------------------------------------*/
4842   /* allocate bi array and free space for accumulating nonzero column info */
4843   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4844   bi[0] = 0;
4845 
4846   /* create and initialize a linked list */
4847   nlnk = N+1;
4848   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4849 
4850   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4851   len  = ai[owners[rank+1]] - ai[owners[rank]];
4852   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4853 
4854   current_space = free_space;
4855 
4856   /* determine symbolic info for each local row */
4857   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4858 
4859   for (k=0; k<merge->nrecv; k++) {
4860     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4861     nrows       = *buf_ri_k[k];
4862     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4863     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4864   }
4865 
4866   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4867   len  = 0;
4868   for (i=0; i<m; i++) {
4869     bnzi = 0;
4870     /* add local non-zero cols of this proc's seqmat into lnk */
4871     arow  = owners[rank] + i;
4872     anzi  = ai[arow+1] - ai[arow];
4873     aj    = a->j + ai[arow];
4874     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4875     bnzi += nlnk;
4876     /* add received col data into lnk */
4877     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4878       if (i == *nextrow[k]) { /* i-th row */
4879         anzi  = *(nextai[k]+1) - *nextai[k];
4880         aj    = buf_rj[k] + *nextai[k];
4881         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4882         bnzi += nlnk;
4883         nextrow[k]++; nextai[k]++;
4884       }
4885     }
4886     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4887 
4888     /* if free space is not available, make more free space */
4889     if (current_space->local_remaining<bnzi) {
4890       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4891       nspacedouble++;
4892     }
4893     /* copy data into free space, then initialize lnk */
4894     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4895     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4896 
4897     current_space->array           += bnzi;
4898     current_space->local_used      += bnzi;
4899     current_space->local_remaining -= bnzi;
4900 
4901     bi[i+1] = bi[i] + bnzi;
4902   }
4903 
4904   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4905 
4906   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4907   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4908   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4909 
4910   /* create symbolic parallel matrix B_mpi */
4911   /*---------------------------------------*/
4912   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4913   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4914   if (n==PETSC_DECIDE) {
4915     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4916   } else {
4917     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4918   }
4919   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4920   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4921   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4922   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4923   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4924 
4925   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4926   B_mpi->assembled    = PETSC_FALSE;
4927   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4928   merge->bi           = bi;
4929   merge->bj           = bj;
4930   merge->buf_ri       = buf_ri;
4931   merge->buf_rj       = buf_rj;
4932   merge->coi          = NULL;
4933   merge->coj          = NULL;
4934   merge->owners_co    = NULL;
4935 
4936   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4937 
4938   /* attach the supporting struct to B_mpi for reuse */
4939   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4940   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4941   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4942   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4943   *mpimat = B_mpi;
4944 
4945   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4951 /*@C
4952       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4953                  matrices from each processor
4954 
4955     Collective on MPI_Comm
4956 
4957    Input Parameters:
4958 +    comm - the communicators the parallel matrix will live on
4959 .    seqmat - the input sequential matrices
4960 .    m - number of local rows (or PETSC_DECIDE)
4961 .    n - number of local columns (or PETSC_DECIDE)
4962 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4963 
4964    Output Parameter:
4965 .    mpimat - the parallel matrix generated
4966 
4967     Level: advanced
4968 
4969    Notes:
4970      The dimensions of the sequential matrix in each processor MUST be the same.
4971      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4972      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4973 @*/
4974 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4975 {
4976   PetscErrorCode ierr;
4977   PetscMPIInt    size;
4978 
4979   PetscFunctionBegin;
4980   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4981   if (size == 1) {
4982     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4983     if (scall == MAT_INITIAL_MATRIX) {
4984       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4985     } else {
4986       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4987     }
4988     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4989     PetscFunctionReturn(0);
4990   }
4991   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4992   if (scall == MAT_INITIAL_MATRIX) {
4993     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4994   }
4995   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4996   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997   PetscFunctionReturn(0);
4998 }
4999 
5000 #undef __FUNCT__
5001 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5002 /*@
5003      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5004           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5005           with MatGetSize()
5006 
5007     Not Collective
5008 
5009    Input Parameters:
5010 +    A - the matrix
5011 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5012 
5013    Output Parameter:
5014 .    A_loc - the local sequential matrix generated
5015 
5016     Level: developer
5017 
5018 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5019 
5020 @*/
5021 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5022 {
5023   PetscErrorCode ierr;
5024   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5025   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5026   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5027   MatScalar      *aa=a->a,*ba=b->a,*cam;
5028   PetscScalar    *ca;
5029   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5030   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5031   PetscBool      match;
5032 
5033   PetscFunctionBegin;
5034   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5035   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5036   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5037   if (scall == MAT_INITIAL_MATRIX) {
5038     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5039     ci[0] = 0;
5040     for (i=0; i<am; i++) {
5041       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5042     }
5043     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5044     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5045     k    = 0;
5046     for (i=0; i<am; i++) {
5047       ncols_o = bi[i+1] - bi[i];
5048       ncols_d = ai[i+1] - ai[i];
5049       /* off-diagonal portion of A */
5050       for (jo=0; jo<ncols_o; jo++) {
5051         col = cmap[*bj];
5052         if (col >= cstart) break;
5053         cj[k]   = col; bj++;
5054         ca[k++] = *ba++;
5055       }
5056       /* diagonal portion of A */
5057       for (j=0; j<ncols_d; j++) {
5058         cj[k]   = cstart + *aj++;
5059         ca[k++] = *aa++;
5060       }
5061       /* off-diagonal portion of A */
5062       for (j=jo; j<ncols_o; j++) {
5063         cj[k]   = cmap[*bj++];
5064         ca[k++] = *ba++;
5065       }
5066     }
5067     /* put together the new matrix */
5068     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5069     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5070     /* Since these are PETSc arrays, change flags to free them as necessary. */
5071     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5072     mat->free_a  = PETSC_TRUE;
5073     mat->free_ij = PETSC_TRUE;
5074     mat->nonew   = 0;
5075   } else if (scall == MAT_REUSE_MATRIX) {
5076     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5077     ci = mat->i; cj = mat->j; cam = mat->a;
5078     for (i=0; i<am; i++) {
5079       /* off-diagonal portion of A */
5080       ncols_o = bi[i+1] - bi[i];
5081       for (jo=0; jo<ncols_o; jo++) {
5082         col = cmap[*bj];
5083         if (col >= cstart) break;
5084         *cam++ = *ba++; bj++;
5085       }
5086       /* diagonal portion of A */
5087       ncols_d = ai[i+1] - ai[i];
5088       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5089       /* off-diagonal portion of A */
5090       for (j=jo; j<ncols_o; j++) {
5091         *cam++ = *ba++; bj++;
5092       }
5093     }
5094   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5095   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5096   PetscFunctionReturn(0);
5097 }
5098 
5099 #undef __FUNCT__
5100 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5101 /*@C
5102      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5103 
5104     Not Collective
5105 
5106    Input Parameters:
5107 +    A - the matrix
5108 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 -    row, col - index sets of rows and columns to extract (or NULL)
5110 
5111    Output Parameter:
5112 .    A_loc - the local sequential matrix generated
5113 
5114     Level: developer
5115 
5116 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5117 
5118 @*/
5119 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5120 {
5121   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5122   PetscErrorCode ierr;
5123   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5124   IS             isrowa,iscola;
5125   Mat            *aloc;
5126   PetscBool      match;
5127 
5128   PetscFunctionBegin;
5129   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5130   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5131   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5132   if (!row) {
5133     start = A->rmap->rstart; end = A->rmap->rend;
5134     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5135   } else {
5136     isrowa = *row;
5137   }
5138   if (!col) {
5139     start = A->cmap->rstart;
5140     cmap  = a->garray;
5141     nzA   = a->A->cmap->n;
5142     nzB   = a->B->cmap->n;
5143     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5144     ncols = 0;
5145     for (i=0; i<nzB; i++) {
5146       if (cmap[i] < start) idx[ncols++] = cmap[i];
5147       else break;
5148     }
5149     imark = i;
5150     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5151     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5152     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5153   } else {
5154     iscola = *col;
5155   }
5156   if (scall != MAT_INITIAL_MATRIX) {
5157     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5158     aloc[0] = *A_loc;
5159   }
5160   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5161   *A_loc = aloc[0];
5162   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5163   if (!row) {
5164     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5165   }
5166   if (!col) {
5167     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5168   }
5169   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 #undef __FUNCT__
5174 #define __FUNCT__ "MatGetBrowsOfAcols"
5175 /*@C
5176     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5177 
5178     Collective on Mat
5179 
5180    Input Parameters:
5181 +    A,B - the matrices in mpiaij format
5182 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5183 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5184 
5185    Output Parameter:
5186 +    rowb, colb - index sets of rows and columns of B to extract
5187 -    B_seq - the sequential matrix generated
5188 
5189     Level: developer
5190 
5191 @*/
5192 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5193 {
5194   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5195   PetscErrorCode ierr;
5196   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5197   IS             isrowb,iscolb;
5198   Mat            *bseq=NULL;
5199 
5200   PetscFunctionBegin;
5201   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5202     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5203   }
5204   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5205 
5206   if (scall == MAT_INITIAL_MATRIX) {
5207     start = A->cmap->rstart;
5208     cmap  = a->garray;
5209     nzA   = a->A->cmap->n;
5210     nzB   = a->B->cmap->n;
5211     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5212     ncols = 0;
5213     for (i=0; i<nzB; i++) {  /* row < local row index */
5214       if (cmap[i] < start) idx[ncols++] = cmap[i];
5215       else break;
5216     }
5217     imark = i;
5218     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5219     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5220     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5221     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5222   } else {
5223     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5224     isrowb  = *rowb; iscolb = *colb;
5225     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5226     bseq[0] = *B_seq;
5227   }
5228   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5229   *B_seq = bseq[0];
5230   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5231   if (!rowb) {
5232     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5233   } else {
5234     *rowb = isrowb;
5235   }
5236   if (!colb) {
5237     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5238   } else {
5239     *colb = iscolb;
5240   }
5241   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5242   PetscFunctionReturn(0);
5243 }
5244 
5245 #undef __FUNCT__
5246 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5247 /*
5248     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5249     of the OFF-DIAGONAL portion of local A
5250 
5251     Collective on Mat
5252 
5253    Input Parameters:
5254 +    A,B - the matrices in mpiaij format
5255 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5256 
5257    Output Parameter:
5258 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5259 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5260 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5261 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5262 
5263     Level: developer
5264 
5265 */
5266 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5267 {
5268   VecScatter_MPI_General *gen_to,*gen_from;
5269   PetscErrorCode         ierr;
5270   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5271   Mat_SeqAIJ             *b_oth;
5272   VecScatter             ctx =a->Mvctx;
5273   MPI_Comm               comm;
5274   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5275   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5276   PetscScalar            *rvalues,*svalues;
5277   MatScalar              *b_otha,*bufa,*bufA;
5278   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5279   MPI_Request            *rwaits = NULL,*swaits = NULL;
5280   MPI_Status             *sstatus,rstatus;
5281   PetscMPIInt            jj;
5282   PetscInt               *cols,sbs,rbs;
5283   PetscScalar            *vals;
5284 
5285   PetscFunctionBegin;
5286   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5287   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5288     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5289   }
5290   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5291   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5292 
5293   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5294   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5295   rvalues  = gen_from->values; /* holds the length of receiving row */
5296   svalues  = gen_to->values;   /* holds the length of sending row */
5297   nrecvs   = gen_from->n;
5298   nsends   = gen_to->n;
5299 
5300   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5301   srow    = gen_to->indices;    /* local row index to be sent */
5302   sstarts = gen_to->starts;
5303   sprocs  = gen_to->procs;
5304   sstatus = gen_to->sstatus;
5305   sbs     = gen_to->bs;
5306   rstarts = gen_from->starts;
5307   rprocs  = gen_from->procs;
5308   rbs     = gen_from->bs;
5309 
5310   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5311   if (scall == MAT_INITIAL_MATRIX) {
5312     /* i-array */
5313     /*---------*/
5314     /*  post receives */
5315     for (i=0; i<nrecvs; i++) {
5316       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5317       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5318       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5319     }
5320 
5321     /* pack the outgoing message */
5322     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5323 
5324     sstartsj[0] = 0;
5325     rstartsj[0] = 0;
5326     len         = 0; /* total length of j or a array to be sent */
5327     k           = 0;
5328     for (i=0; i<nsends; i++) {
5329       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5330       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5331       for (j=0; j<nrows; j++) {
5332         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5333         for (l=0; l<sbs; l++) {
5334           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5335 
5336           rowlen[j*sbs+l] = ncols;
5337 
5338           len += ncols;
5339           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5340         }
5341         k++;
5342       }
5343       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5344 
5345       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5346     }
5347     /* recvs and sends of i-array are completed */
5348     i = nrecvs;
5349     while (i--) {
5350       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5351     }
5352     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5353 
5354     /* allocate buffers for sending j and a arrays */
5355     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5356     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5357 
5358     /* create i-array of B_oth */
5359     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5360 
5361     b_othi[0] = 0;
5362     len       = 0; /* total length of j or a array to be received */
5363     k         = 0;
5364     for (i=0; i<nrecvs; i++) {
5365       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5366       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5367       for (j=0; j<nrows; j++) {
5368         b_othi[k+1] = b_othi[k] + rowlen[j];
5369         len        += rowlen[j]; k++;
5370       }
5371       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5372     }
5373 
5374     /* allocate space for j and a arrrays of B_oth */
5375     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5376     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5377 
5378     /* j-array */
5379     /*---------*/
5380     /*  post receives of j-array */
5381     for (i=0; i<nrecvs; i++) {
5382       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5383       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5384     }
5385 
5386     /* pack the outgoing message j-array */
5387     k = 0;
5388     for (i=0; i<nsends; i++) {
5389       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5390       bufJ  = bufj+sstartsj[i];
5391       for (j=0; j<nrows; j++) {
5392         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5393         for (ll=0; ll<sbs; ll++) {
5394           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5395           for (l=0; l<ncols; l++) {
5396             *bufJ++ = cols[l];
5397           }
5398           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5399         }
5400       }
5401       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5402     }
5403 
5404     /* recvs and sends of j-array are completed */
5405     i = nrecvs;
5406     while (i--) {
5407       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5408     }
5409     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5410   } else if (scall == MAT_REUSE_MATRIX) {
5411     sstartsj = *startsj_s;
5412     rstartsj = *startsj_r;
5413     bufa     = *bufa_ptr;
5414     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5415     b_otha   = b_oth->a;
5416   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5417 
5418   /* a-array */
5419   /*---------*/
5420   /*  post receives of a-array */
5421   for (i=0; i<nrecvs; i++) {
5422     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5423     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5424   }
5425 
5426   /* pack the outgoing message a-array */
5427   k = 0;
5428   for (i=0; i<nsends; i++) {
5429     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5430     bufA  = bufa+sstartsj[i];
5431     for (j=0; j<nrows; j++) {
5432       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5433       for (ll=0; ll<sbs; ll++) {
5434         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5435         for (l=0; l<ncols; l++) {
5436           *bufA++ = vals[l];
5437         }
5438         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5439       }
5440     }
5441     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5442   }
5443   /* recvs and sends of a-array are completed */
5444   i = nrecvs;
5445   while (i--) {
5446     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5447   }
5448   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5449   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5450 
5451   if (scall == MAT_INITIAL_MATRIX) {
5452     /* put together the new matrix */
5453     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5454 
5455     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5456     /* Since these are PETSc arrays, change flags to free them as necessary. */
5457     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5458     b_oth->free_a  = PETSC_TRUE;
5459     b_oth->free_ij = PETSC_TRUE;
5460     b_oth->nonew   = 0;
5461 
5462     ierr = PetscFree(bufj);CHKERRQ(ierr);
5463     if (!startsj_s || !bufa_ptr) {
5464       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5465       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5466     } else {
5467       *startsj_s = sstartsj;
5468       *startsj_r = rstartsj;
5469       *bufa_ptr  = bufa;
5470     }
5471   }
5472   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 #undef __FUNCT__
5477 #define __FUNCT__ "MatGetCommunicationStructs"
5478 /*@C
5479   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5480 
5481   Not Collective
5482 
5483   Input Parameters:
5484 . A - The matrix in mpiaij format
5485 
5486   Output Parameter:
5487 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5488 . colmap - A map from global column index to local index into lvec
5489 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5490 
5491   Level: developer
5492 
5493 @*/
5494 #if defined(PETSC_USE_CTABLE)
5495 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5496 #else
5497 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5498 #endif
5499 {
5500   Mat_MPIAIJ *a;
5501 
5502   PetscFunctionBegin;
5503   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5504   PetscValidPointer(lvec, 2);
5505   PetscValidPointer(colmap, 3);
5506   PetscValidPointer(multScatter, 4);
5507   a = (Mat_MPIAIJ*) A->data;
5508   if (lvec) *lvec = a->lvec;
5509   if (colmap) *colmap = a->colmap;
5510   if (multScatter) *multScatter = a->Mvctx;
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 EXTERN_C_BEGIN
5515 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5516 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5517 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5518 EXTERN_C_END
5519 
5520 #undef __FUNCT__
5521 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5522 /*
5523     Computes (B'*A')' since computing B*A directly is untenable
5524 
5525                n                       p                          p
5526         (              )       (              )         (                  )
5527       m (      A       )  *  n (       B      )   =   m (         C        )
5528         (              )       (              )         (                  )
5529 
5530 */
5531 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5532 {
5533   PetscErrorCode ierr;
5534   Mat            At,Bt,Ct;
5535 
5536   PetscFunctionBegin;
5537   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5538   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5539   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5540   ierr = MatDestroy(&At);CHKERRQ(ierr);
5541   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5542   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5543   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5544   PetscFunctionReturn(0);
5545 }
5546 
5547 #undef __FUNCT__
5548 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5549 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5550 {
5551   PetscErrorCode ierr;
5552   PetscInt       m=A->rmap->n,n=B->cmap->n;
5553   Mat            Cmat;
5554 
5555   PetscFunctionBegin;
5556   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5557   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5558   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5559   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5560   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5561   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5562   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5563   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5564 
5565   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5566 
5567   *C = Cmat;
5568   PetscFunctionReturn(0);
5569 }
5570 
5571 /* ----------------------------------------------------------------*/
5572 #undef __FUNCT__
5573 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5574 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5575 {
5576   PetscErrorCode ierr;
5577 
5578   PetscFunctionBegin;
5579   if (scall == MAT_INITIAL_MATRIX) {
5580     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5581   }
5582   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5583   PetscFunctionReturn(0);
5584 }
5585 
5586 EXTERN_C_BEGIN
5587 #if defined(PETSC_HAVE_MUMPS)
5588 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5589 #endif
5590 #if defined(PETSC_HAVE_PASTIX)
5591 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5592 #endif
5593 #if defined(PETSC_HAVE_SUPERLU_DIST)
5594 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5595 #endif
5596 #if defined(PETSC_HAVE_CLIQUE)
5597 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5598 #endif
5599 EXTERN_C_END
5600 
5601 /*MC
5602    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5603 
5604    Options Database Keys:
5605 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5606 
5607   Level: beginner
5608 
5609 .seealso: MatCreateAIJ()
5610 M*/
5611 
5612 EXTERN_C_BEGIN
5613 #undef __FUNCT__
5614 #define __FUNCT__ "MatCreate_MPIAIJ"
5615 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5616 {
5617   Mat_MPIAIJ     *b;
5618   PetscErrorCode ierr;
5619   PetscMPIInt    size;
5620 
5621   PetscFunctionBegin;
5622   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5623 
5624   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5625   B->data       = (void*)b;
5626   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5627   B->assembled  = PETSC_FALSE;
5628   B->insertmode = NOT_SET_VALUES;
5629   b->size       = size;
5630 
5631   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5632 
5633   /* build cache for off array entries formed */
5634   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5635 
5636   b->donotstash  = PETSC_FALSE;
5637   b->colmap      = 0;
5638   b->garray      = 0;
5639   b->roworiented = PETSC_TRUE;
5640 
5641   /* stuff used for matrix vector multiply */
5642   b->lvec  = NULL;
5643   b->Mvctx = NULL;
5644 
5645   /* stuff for MatGetRow() */
5646   b->rowindices   = 0;
5647   b->rowvalues    = 0;
5648   b->getrowactive = PETSC_FALSE;
5649 
5650   /* flexible pointer used in CUSP/CUSPARSE classes */
5651   b->spptr = NULL;
5652 
5653 #if defined(PETSC_HAVE_MUMPS)
5654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5655                                            "MatGetFactor_aij_mumps",
5656                                            MatGetFactor_aij_mumps);CHKERRQ(ierr);
5657 #endif
5658 #if defined(PETSC_HAVE_PASTIX)
5659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5660                                            "MatGetFactor_mpiaij_pastix",
5661                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5662 #endif
5663 #if defined(PETSC_HAVE_SUPERLU_DIST)
5664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5665                                            "MatGetFactor_mpiaij_superlu_dist",
5666                                            MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5667 #endif
5668 #if defined(PETSC_HAVE_CLIQUE)
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5670                                            "MatGetFactor_aij_clique",
5671                                            MatGetFactor_aij_clique);CHKERRQ(ierr);
5672 #endif
5673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5674                                            "MatStoreValues_MPIAIJ",
5675                                            MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5677                                            "MatRetrieveValues_MPIAIJ",
5678                                            MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5680                                            "MatGetDiagonalBlock_MPIAIJ",
5681                                            MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5683                                            "MatIsTranspose_MPIAIJ",
5684                                            MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5686                                            "MatMPIAIJSetPreallocation_MPIAIJ",
5687                                            MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5689                                            "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5690                                            MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5692                                            "MatDiagonalScaleLocal_MPIAIJ",
5693                                            MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5695                                            "MatConvert_MPIAIJ_MPIAIJPERM",
5696                                            MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5698                                            "MatConvert_MPIAIJ_MPIAIJCRL",
5699                                            MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5701                                            "MatConvert_MPIAIJ_MPISBAIJ",
5702                                            MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5704                                            "MatMatMult_MPIDense_MPIAIJ",
5705                                            MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5707                                            "MatMatMultSymbolic_MPIDense_MPIAIJ",
5708                                            MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5710                                            "MatMatMultNumeric_MPIDense_MPIAIJ",
5711                                            MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5712   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5713   PetscFunctionReturn(0);
5714 }
5715 EXTERN_C_END
5716 
5717 #undef __FUNCT__
5718 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5719 /*@
5720      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5721          and "off-diagonal" part of the matrix in CSR format.
5722 
5723    Collective on MPI_Comm
5724 
5725    Input Parameters:
5726 +  comm - MPI communicator
5727 .  m - number of local rows (Cannot be PETSC_DECIDE)
5728 .  n - This value should be the same as the local size used in creating the
5729        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5730        calculated if N is given) For square matrices n is almost always m.
5731 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5732 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5733 .   i - row indices for "diagonal" portion of matrix
5734 .   j - column indices
5735 .   a - matrix values
5736 .   oi - row indices for "off-diagonal" portion of matrix
5737 .   oj - column indices
5738 -   oa - matrix values
5739 
5740    Output Parameter:
5741 .   mat - the matrix
5742 
5743    Level: advanced
5744 
5745    Notes:
5746        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5747        must free the arrays once the matrix has been destroyed and not before.
5748 
5749        The i and j indices are 0 based
5750 
5751        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5752 
5753        This sets local rows and cannot be used to set off-processor values.
5754 
5755        You cannot later use MatSetValues() to change values in this matrix.
5756 
5757 .keywords: matrix, aij, compressed row, sparse, parallel
5758 
5759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5760           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5761 @*/
5762 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5763 {
5764   PetscErrorCode ierr;
5765   Mat_MPIAIJ     *maij;
5766 
5767   PetscFunctionBegin;
5768   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5769   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5770   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5771   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5772   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5773   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5774   maij = (Mat_MPIAIJ*) (*mat)->data;
5775 
5776   maij->donotstash     = PETSC_TRUE;
5777   (*mat)->preallocated = PETSC_TRUE;
5778 
5779   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5780   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5781 
5782   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5783   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5784 
5785   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5786   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5787   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5788   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5789 
5790   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5791   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5792   PetscFunctionReturn(0);
5793 }
5794 
5795 /*
5796     Special version for direct calls from Fortran
5797 */
5798 #include <petsc-private/fortranimpl.h>
5799 
5800 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5801 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5802 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5803 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5804 #endif
5805 
5806 /* Change these macros so can be used in void function */
5807 #undef CHKERRQ
5808 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5809 #undef SETERRQ2
5810 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5811 #undef SETERRQ3
5812 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5813 #undef SETERRQ
5814 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5815 
5816 EXTERN_C_BEGIN
5817 #undef __FUNCT__
5818 #define __FUNCT__ "matsetvaluesmpiaij_"
5819 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5820 {
5821   Mat            mat  = *mmat;
5822   PetscInt       m    = *mm, n = *mn;
5823   InsertMode     addv = *maddv;
5824   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5825   PetscScalar    value;
5826   PetscErrorCode ierr;
5827 
5828   MatCheckPreallocated(mat,1);
5829   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5830 
5831 #if defined(PETSC_USE_DEBUG)
5832   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5833 #endif
5834   {
5835     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5836     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5837     PetscBool roworiented = aij->roworiented;
5838 
5839     /* Some Variables required in the macro */
5840     Mat        A                 = aij->A;
5841     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5842     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5843     MatScalar  *aa               = a->a;
5844     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5845     Mat        B                 = aij->B;
5846     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5847     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5848     MatScalar  *ba               = b->a;
5849 
5850     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5851     PetscInt  nonew = a->nonew;
5852     MatScalar *ap1,*ap2;
5853 
5854     PetscFunctionBegin;
5855     for (i=0; i<m; i++) {
5856       if (im[i] < 0) continue;
5857 #if defined(PETSC_USE_DEBUG)
5858       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5859 #endif
5860       if (im[i] >= rstart && im[i] < rend) {
5861         row      = im[i] - rstart;
5862         lastcol1 = -1;
5863         rp1      = aj + ai[row];
5864         ap1      = aa + ai[row];
5865         rmax1    = aimax[row];
5866         nrow1    = ailen[row];
5867         low1     = 0;
5868         high1    = nrow1;
5869         lastcol2 = -1;
5870         rp2      = bj + bi[row];
5871         ap2      = ba + bi[row];
5872         rmax2    = bimax[row];
5873         nrow2    = bilen[row];
5874         low2     = 0;
5875         high2    = nrow2;
5876 
5877         for (j=0; j<n; j++) {
5878           if (roworiented) value = v[i*n+j];
5879           else value = v[i+j*m];
5880           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5881           if (in[j] >= cstart && in[j] < cend) {
5882             col = in[j] - cstart;
5883             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5884           } else if (in[j] < 0) continue;
5885 #if defined(PETSC_USE_DEBUG)
5886           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5887 #endif
5888           else {
5889             if (mat->was_assembled) {
5890               if (!aij->colmap) {
5891                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5892               }
5893 #if defined(PETSC_USE_CTABLE)
5894               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5895               col--;
5896 #else
5897               col = aij->colmap[in[j]] - 1;
5898 #endif
5899               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5900                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5901                 col  =  in[j];
5902                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5903                 B     = aij->B;
5904                 b     = (Mat_SeqAIJ*)B->data;
5905                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5906                 rp2   = bj + bi[row];
5907                 ap2   = ba + bi[row];
5908                 rmax2 = bimax[row];
5909                 nrow2 = bilen[row];
5910                 low2  = 0;
5911                 high2 = nrow2;
5912                 bm    = aij->B->rmap->n;
5913                 ba    = b->a;
5914               }
5915             } else col = in[j];
5916             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5917           }
5918         }
5919       } else if (!aij->donotstash) {
5920         if (roworiented) {
5921           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5922         } else {
5923           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5924         }
5925       }
5926     }
5927   }
5928   PetscFunctionReturnVoid();
5929 }
5930 EXTERN_C_END
5931 
5932