xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7c8652dd9fb051dfaf30896d504f41ba028df3ea)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     if (!rank) {
206       bses[0] = gmat->rmap->bs;
207       bses[1] = gmat->cmap->bs;
208     }
209     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
210     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
211     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
212     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
213     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
214     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
215 
216     rowners[0] = 0;
217     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218     rstart = rowners[rank];
219     rend   = rowners[rank+1];
220     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
221     if (!rank) {
222       gmata = (Mat_SeqAIJ*) gmat->data;
223       /* send row lengths to all processors */
224       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225       for (i=1; i<size; i++) {
226         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
227       }
228       /* determine number diagonal and off-diagonal counts */
229       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
230       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined(PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
373   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374 #endif
375   PetscFunctionReturn(0);
376 }
377 
378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379 { \
380     if (col <= lastcol1)  low1 = 0;     \
381     else                 high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else              low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414   { \
415     if (col <= lastcol2) low2 = 0;                        \
416     else high2 = nrow2;                                   \
417     lastcol2 = col;                                       \
418     while (high2-low2 > 5) {                              \
419       t = (low2+high2)/2;                                 \
420       if (rp2[t] > col) high2 = t;                        \
421       else             low2  = t;                         \
422     }                                                     \
423     for (_i=low2; _i<high2; _i++) {                       \
424       if (rp2[_i] > col) break;                           \
425       if (rp2[_i] == col) {                               \
426         if (addv == ADD_VALUES) ap2[_i] += value;         \
427         else                    ap2[_i] = value;          \
428         goto b_noinsert;                                  \
429       }                                                   \
430     }                                                     \
431     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
433     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
434     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
435     N = nrow2++ - 1; b->nz++; high2++;                    \
436     /* shift up all the later entries in this row */      \
437     for (ii=N; ii>=_i; ii--) {                            \
438       rp2[ii+1] = rp2[ii];                                \
439       ap2[ii+1] = ap2[ii];                                \
440     }                                                     \
441     rp2[_i] = col;                                        \
442     ap2[_i] = value;                                      \
443     b_noinsert: ;                                         \
444     bilen[row] = nrow2;                                   \
445   }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
450 {
451   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
452   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453   PetscErrorCode ierr;
454   PetscInt       l,*garray = mat->garray,diag;
455 
456   PetscFunctionBegin;
457   /* code only works for square matrices A */
458 
459   /* find size of row to the left of the diagonal part */
460   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
461   row  = row - diag;
462   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
463     if (garray[b->j[b->i[row]+l]] > diag) break;
464   }
465   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* diagonal part */
468   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* right of diagonal part */
471   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatSetValues_MPIAIJ"
477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478 {
479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
480   PetscScalar    value;
481   PetscErrorCode ierr;
482   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
483   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
484   PetscBool      roworiented = aij->roworiented;
485 
486   /* Some Variables required in the macro */
487   Mat        A                 = aij->A;
488   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
489   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
490   MatScalar  *aa               = a->a;
491   PetscBool  ignorezeroentries = a->ignorezeroentries;
492   Mat        B                 = aij->B;
493   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
494   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
495   MatScalar  *ba               = b->a;
496 
497   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
498   PetscInt  nonew;
499   MatScalar *ap1,*ap2;
500 
501   PetscFunctionBegin;
502   if (v) PetscValidScalarPointer(v,6);
503   for (i=0; i<m; i++) {
504     if (im[i] < 0) continue;
505 #if defined(PETSC_USE_DEBUG)
506     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
507 #endif
508     if (im[i] >= rstart && im[i] < rend) {
509       row      = im[i] - rstart;
510       lastcol1 = -1;
511       rp1      = aj + ai[row];
512       ap1      = aa + ai[row];
513       rmax1    = aimax[row];
514       nrow1    = ailen[row];
515       low1     = 0;
516       high1    = nrow1;
517       lastcol2 = -1;
518       rp2      = bj + bi[row];
519       ap2      = ba + bi[row];
520       rmax2    = bimax[row];
521       nrow2    = bilen[row];
522       low2     = 0;
523       high2    = nrow2;
524 
525       for (j=0; j<n; j++) {
526         if (v) {
527           if (roworiented) value = v[i*n+j];
528           else             value = v[i+j*m];
529         } else value = 0.0;
530         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
531         if (in[j] >= cstart && in[j] < cend) {
532           col   = in[j] - cstart;
533           nonew = a->nonew;
534           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
535         } else if (in[j] < 0) continue;
536 #if defined(PETSC_USE_DEBUG)
537         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
538 #endif
539         else {
540           if (mat->was_assembled) {
541             if (!aij->colmap) {
542               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
543             }
544 #if defined(PETSC_USE_CTABLE)
545             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
546             col--;
547 #else
548             col = aij->colmap[in[j]] - 1;
549 #endif
550             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
551               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
552               col  =  in[j];
553               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
554               B     = aij->B;
555               b     = (Mat_SeqAIJ*)B->data;
556               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
557               rp2   = bj + bi[row];
558               ap2   = ba + bi[row];
559               rmax2 = bimax[row];
560               nrow2 = bilen[row];
561               low2  = 0;
562               high2 = nrow2;
563               bm    = aij->B->rmap->n;
564               ba    = b->a;
565             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
566           } else col = in[j];
567           nonew = b->nonew;
568           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
569         }
570       }
571     } else {
572       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
573       if (!aij->donotstash) {
574         mat->assembled = PETSC_FALSE;
575         if (roworiented) {
576           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
577         } else {
578           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         }
580       }
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatGetValues_MPIAIJ"
588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
589 {
590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
591   PetscErrorCode ierr;
592   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
594 
595   PetscFunctionBegin;
596   for (i=0; i<m; i++) {
597     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
598     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
599     if (idxm[i] >= rstart && idxm[i] < rend) {
600       row = idxm[i] - rstart;
601       for (j=0; j<n; j++) {
602         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
603         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
604         if (idxn[j] >= cstart && idxn[j] < cend) {
605           col  = idxn[j] - cstart;
606           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607         } else {
608           if (!aij->colmap) {
609             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
610           }
611 #if defined(PETSC_USE_CTABLE)
612           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
613           col--;
614 #else
615           col = aij->colmap[idxn[j]] - 1;
616 #endif
617           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
618           else {
619             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620           }
621         }
622       }
623     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       nstash,reallocs;
637   InsertMode     addv;
638 
639   PetscFunctionBegin;
640   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
668 
669   PetscFunctionBegin;
670   if (!aij->donotstash && !mat->nooffprocentries) {
671     while (1) {
672       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
673       if (!flg) break;
674 
675       for (i=0; i<n; ) {
676         /* Now identify the consecutive vals belonging to the same row */
677         for (j=i,rstart=row[j]; j<n; j++) {
678           if (row[j] != rstart) break;
679         }
680         if (j < n) ncols = j-i;
681         else       ncols = n-i;
682         /* Now assemble all these values with a single function call */
683         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
684 
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
710   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
711 
712   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
713 
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
743   PetscInt      *owners = A->rmap->range;
744   PetscInt       n      = A->rmap->n;
745   PetscMPIInt    size   = mat->size;
746   PetscSF        sf;
747   PetscInt      *lrows;
748   PetscSFNode   *rrows;
749   PetscInt       lastidx = -1, r, p = 0, len = 0;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   /* Create SF where leaves are input rows and roots are owned rows */
754   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
755   for (r = 0; r < n; ++r) lrows[r] = -1;
756   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
757   for (r = 0; r < N; ++r) {
758     const PetscInt idx   = rows[r];
759     PetscBool      found = PETSC_FALSE;
760     /* Trick for efficient searching for sorted rows */
761     if (lastidx > idx) p = 0;
762     lastidx = idx;
763     for (; p < size; ++p) {
764       if (idx >= owners[p] && idx < owners[p+1]) {
765         rrows[r].rank  = p;
766         rrows[r].index = rows[r] - owners[p];
767         found = PETSC_TRUE;
768         break;
769       }
770     }
771     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
772   }
773   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775   /* Collect flags for rows to be zeroed */
776   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
777   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
778   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779   /* Compress and put in row numbers */
780   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
794   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
795     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
796   } else if (diag != 0.0) {
797     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
798     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
802     }
803     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805   } else {
806     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
807   }
808   ierr = PetscFree(lrows);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815 {
816   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
817   PetscErrorCode    ierr;
818   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
819   PetscInt          i,*owners = A->rmap->range;
820   PetscInt          *sizes,j,idx,nsends;
821   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
822   PetscInt          *rvalues,count,base,slen,*source;
823   PetscInt          *lens,*lrows,*values,m;
824   MPI_Comm          comm;
825   MPI_Request       *send_waits,*recv_waits;
826   MPI_Status        recv_status,*send_status;
827   const PetscScalar *xx;
828   PetscScalar       *bb,*mask;
829   Vec               xmask,lmask;
830   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
831   const PetscInt    *aj, *ii,*ridx;
832   PetscScalar       *aa;
833 #if defined(PETSC_DEBUG)
834   PetscBool found = PETSC_FALSE;
835 #endif
836 
837   PetscFunctionBegin;
838   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
839   /*  first count number of contributors to each processor */
840   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
841   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/
842   j    = 0;
843   for (i=0; i<N; i++) {
844     if (lastidx > (idx = rows[i])) j = 0;
845     lastidx = idx;
846     for (; j<size; j++) {
847       if (idx >= owners[j] && idx < owners[j+1]) {
848         sizes[2*j]++;
849         sizes[2*j+1] = 1;
850         owner[i]      = j;
851 #if defined(PETSC_DEBUG)
852         found = PETSC_TRUE;
853 #endif
854         break;
855       }
856     }
857 #if defined(PETSC_DEBUG)
858     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
859     found = PETSC_FALSE;
860 #endif
861   }
862   nsends = 0;  for (i=0; i<size; i++) nsends += sizes[2*i+1];
863 
864   /* inform other processors of number of messages and max length*/
865   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
866 
867   /* post receives:   */
868   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
869   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
870   for (i=0; i<nrecvs; i++) {
871     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
872   }
873 
874   /* do sends:
875       1) starts[i] gives the starting index in svalues for stuff going to
876          the ith processor
877   */
878   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
879   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
880   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
881 
882   starts[0] = 0;
883   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
884   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
885 
886   starts[0] = 0;
887   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
888   count = 0;
889   for (i=0; i<size; i++) {
890     if (sizes[2*i+1]) {
891       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
892     }
893   }
894   ierr = PetscFree(starts);CHKERRQ(ierr);
895 
896   base = owners[rank];
897 
898   /*  wait on receives */
899   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
900   count = nrecvs; slen = 0;
901   while (count) {
902     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
903     /* unpack receives into our local space */
904     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
905 
906     source[imdex] = recv_status.MPI_SOURCE;
907     lens[imdex]   = n;
908     slen         += n;
909     count--;
910   }
911   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
912 
913   /* move the data into the send scatter */
914   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
915   count = 0;
916   for (i=0; i<nrecvs; i++) {
917     values = rvalues + i*nmax;
918     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
919   }
920   ierr = PetscFree(rvalues);CHKERRQ(ierr);
921   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
922   ierr = PetscFree(owner);CHKERRQ(ierr);
923   ierr = PetscFree(sizes);CHKERRQ(ierr);
924   /* lrows are the local rows to be zeroed, slen is the number of local rows */
925 
926   /* zero diagonal part of matrix */
927   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
928 
929   /* handle off diagonal part of matrix */
930   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
931   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
932   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
933   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
934   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
935   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
938   if (x) {
939     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
942     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
943   }
944   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
945 
946   /* remove zeroed rows of off diagonal matrix */
947   ii = aij->i;
948   for (i=0; i<slen; i++) {
949     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
950   }
951 
952   /* loop over all elements of off process part of matrix zeroing removed columns*/
953   if (aij->compressedrow.use) {
954     m    = aij->compressedrow.nrows;
955     ii   = aij->compressedrow.i;
956     ridx = aij->compressedrow.rindex;
957     for (i=0; i<m; i++) {
958       n  = ii[i+1] - ii[i];
959       aj = aij->j + ii[i];
960       aa = aij->a + ii[i];
961 
962       for (j=0; j<n; j++) {
963         if (PetscAbsScalar(mask[*aj])) {
964           if (b) bb[*ridx] -= *aa*xx[*aj];
965           *aa = 0.0;
966         }
967         aa++;
968         aj++;
969       }
970       ridx++;
971     }
972   } else { /* do not use compressed row format */
973     m = l->B->rmap->n;
974     for (i=0; i<m; i++) {
975       n  = ii[i+1] - ii[i];
976       aj = aij->j + ii[i];
977       aa = aij->a + ii[i];
978       for (j=0; j<n; j++) {
979         if (PetscAbsScalar(mask[*aj])) {
980           if (b) bb[i] -= *aa*xx[*aj];
981           *aa = 0.0;
982         }
983         aa++;
984         aj++;
985       }
986     }
987   }
988   if (x) {
989     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
990     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
991   }
992   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
993   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
994   ierr = PetscFree(lrows);CHKERRQ(ierr);
995 
996   /* wait on sends */
997   if (nsends) {
998     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
999     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1000     ierr = PetscFree(send_status);CHKERRQ(ierr);
1001   }
1002   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1003   ierr = PetscFree(svalues);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMult_MPIAIJ"
1009 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1012   PetscErrorCode ierr;
1013   PetscInt       nt;
1014 
1015   PetscFunctionBegin;
1016   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1017   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1018   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1027 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1028 {
1029   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1039 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1040 {
1041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1042   PetscErrorCode ierr;
1043 
1044   PetscFunctionBegin;
1045   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1047   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1054 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058   PetscBool      merged;
1059 
1060   PetscFunctionBegin;
1061   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1062   /* do nondiagonal part */
1063   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1064   if (!merged) {
1065     /* send it on its way */
1066     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067     /* do local part */
1068     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1069     /* receive remote parts: note this assumes the values are not actually */
1070     /* added in yy until the next line, */
1071     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   } else {
1073     /* do local part */
1074     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1075     /* send it on its way */
1076     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077     /* values actually were received in the Begin() but we need to call this nop */
1078     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1085 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1086 {
1087   MPI_Comm       comm;
1088   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1089   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1090   IS             Me,Notme;
1091   PetscErrorCode ierr;
1092   PetscInt       M,N,first,last,*notme,i;
1093   PetscMPIInt    size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1098   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1099   if (!*f) PetscFunctionReturn(0);
1100   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1102   if (size == 1) PetscFunctionReturn(0);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1105   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1106   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1107   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1108   for (i=0; i<first; i++) notme[i] = i;
1109   for (i=last; i<M; i++) notme[i-last+first] = i;
1110   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1111   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1112   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1113   Aoff = Aoffs[0];
1114   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1115   Boff = Boffs[0];
1116   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1117   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1118   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1119   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1120   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1121   ierr = PetscFree(notme);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1127 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   /* send it on its way */
1136   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137   /* do local part */
1138   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   /* receive remote parts */
1140   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 /*
1145   This only works correctly for square matrices where the subblock A->A is the
1146    diagonal block
1147 */
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1150 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1151 {
1152   PetscErrorCode ierr;
1153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1154 
1155   PetscFunctionBegin;
1156   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1157   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1158   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatScale_MPIAIJ"
1164 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1165 {
1166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1171   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatDestroy_MPIAIJ"
1177 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1178 {
1179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183 #if defined(PETSC_USE_LOG)
1184   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1185 #endif
1186   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1187   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1188   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1189   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1190 #if defined(PETSC_USE_CTABLE)
1191   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1192 #else
1193   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1194 #endif
1195   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1196   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1197   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1198   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1199   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1200   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1201 
1202   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1203   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1204   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1216 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1217 {
1218   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1219   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1220   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1221   PetscErrorCode ierr;
1222   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1223   int            fd;
1224   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1225   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1226   PetscScalar    *column_values;
1227   PetscInt       message_count,flowcontrolcount;
1228   FILE           *file;
1229 
1230   PetscFunctionBegin;
1231   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1232   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1233   nz   = A->nz + B->nz;
1234   if (!rank) {
1235     header[0] = MAT_FILE_CLASSID;
1236     header[1] = mat->rmap->N;
1237     header[2] = mat->cmap->N;
1238 
1239     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1241     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     /* get largest number of rows any processor has */
1243     rlen  = mat->rmap->n;
1244     range = mat->rmap->range;
1245     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1246   } else {
1247     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     rlen = mat->rmap->n;
1249   }
1250 
1251   /* load up the local row counts */
1252   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1253   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1254 
1255   /* store the row lengths to the file */
1256   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1257   if (!rank) {
1258     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       rlen = range[i+1] - range[i];
1262       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1268     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1272 
1273   /* load up the local column indices */
1274   nzmax = nz; /* th processor needs space a largest processor needs */
1275   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1277   cnt   = 0;
1278   for (i=0; i<mat->rmap->n; i++) {
1279     for (j=B->i[i]; j<B->i[i+1]; j++) {
1280       if ((col = garray[B->j[j]]) > cstart) break;
1281       column_indices[cnt++] = col;
1282     }
1283     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1284     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1285   }
1286   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1287 
1288   /* store the column indices to the file */
1289   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1290   if (!rank) {
1291     MPI_Status status;
1292     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1293     for (i=1; i<size; i++) {
1294       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1295       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1296       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1297       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1299     }
1300     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1301   } else {
1302     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1303     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1306   }
1307   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1308 
1309   /* load up the local column values */
1310   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1311   cnt  = 0;
1312   for (i=0; i<mat->rmap->n; i++) {
1313     for (j=B->i[i]; j<B->i[i+1]; j++) {
1314       if (garray[B->j[j]] > cstart) break;
1315       column_values[cnt++] = B->a[j];
1316     }
1317     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1318     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1319   }
1320   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1321 
1322   /* store the column values to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     MPI_Status status;
1326     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1327     for (i=1; i<size; i++) {
1328       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1329       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1330       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1331       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1333     }
1334     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1335   } else {
1336     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1337     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1338     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1339     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1340   }
1341   ierr = PetscFree(column_values);CHKERRQ(ierr);
1342 
1343   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1344   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #include <petscdraw.h>
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1351 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1352 {
1353   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1354   PetscErrorCode    ierr;
1355   PetscMPIInt       rank = aij->rank,size = aij->size;
1356   PetscBool         isdraw,iascii,isbinary;
1357   PetscViewer       sviewer;
1358   PetscViewerFormat format;
1359 
1360   PetscFunctionBegin;
1361   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1362   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1363   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1364   if (iascii) {
1365     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1366     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1367       MatInfo   info;
1368       PetscBool inodes;
1369 
1370       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1371       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1372       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1374       if (!inodes) {
1375         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1376                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1377       } else {
1378         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1379                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1380       }
1381       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1382       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1383       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1384       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1385       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1387       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1388       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1389       PetscFunctionReturn(0);
1390     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1391       PetscInt inodecount,inodelimit,*inodes;
1392       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1393       if (inodes) {
1394         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1395       } else {
1396         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1397       }
1398       PetscFunctionReturn(0);
1399     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1400       PetscFunctionReturn(0);
1401     }
1402   } else if (isbinary) {
1403     if (size == 1) {
1404       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1406     } else {
1407       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1408     }
1409     PetscFunctionReturn(0);
1410   } else if (isdraw) {
1411     PetscDraw draw;
1412     PetscBool isnull;
1413     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1414     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1415   }
1416 
1417   if (size == 1) {
1418     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1420   } else {
1421     /* assemble the entire matrix onto first processor. */
1422     Mat        A;
1423     Mat_SeqAIJ *Aloc;
1424     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1425     MatScalar  *a;
1426 
1427     if (mat->rmap->N > 1024) {
1428       PetscBool flg = PETSC_FALSE;
1429 
1430       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1431       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1432     }
1433 
1434     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1435     if (!rank) {
1436       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1437     } else {
1438       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1439     }
1440     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1441     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1442     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1443     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1444     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1445 
1446     /* copy over the A part */
1447     Aloc = (Mat_SeqAIJ*)aij->A->data;
1448     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1451     for (i=0; i<m; i++) {
1452       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1453       row++;
1454       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1455     }
1456     aj = Aloc->j;
1457     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1458 
1459     /* copy over the B part */
1460     Aloc = (Mat_SeqAIJ*)aij->B->data;
1461     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1462     row  = mat->rmap->rstart;
1463     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1464     ct   = cols;
1465     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1466     for (i=0; i<m; i++) {
1467       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1468       row++;
1469       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1470     }
1471     ierr = PetscFree(ct);CHKERRQ(ierr);
1472     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1473     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474     /*
1475        Everyone has to call to draw the matrix since the graphics waits are
1476        synchronized across all processors that share the PetscDraw object
1477     */
1478     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1479     if (!rank) {
1480       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1481       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1482       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1483       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1484     }
1485     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1486     ierr = MatDestroy(&A);CHKERRQ(ierr);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatView_MPIAIJ"
1493 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1494 {
1495   PetscErrorCode ierr;
1496   PetscBool      iascii,isdraw,issocket,isbinary;
1497 
1498   PetscFunctionBegin;
1499   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1500   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1501   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1502   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1503   if (iascii || isdraw || isbinary || issocket) {
1504     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSOR_MPIAIJ"
1511 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1512 {
1513   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1514   PetscErrorCode ierr;
1515   Vec            bb1 = 0;
1516   PetscBool      hasop;
1517 
1518   PetscFunctionBegin;
1519   if (flag == SOR_APPLY_UPPER) {
1520     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1521     PetscFunctionReturn(0);
1522   }
1523 
1524   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1526   }
1527 
1528   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533 
1534     while (its--) {
1535       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537 
1538       /* update rhs: bb1 = bb - B*x */
1539       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1540       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1541 
1542       /* local sweep */
1543       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1544     }
1545   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1546     if (flag & SOR_ZERO_INITIAL_GUESS) {
1547       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1548       its--;
1549     }
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_EISENSTAT) {
1578     Vec xx1;
1579 
1580     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1581     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1582 
1583     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585     if (!mat->diag) {
1586       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1587       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1588     }
1589     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1590     if (hasop) {
1591       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1592     } else {
1593       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1594     }
1595     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1596 
1597     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1598 
1599     /* local sweep */
1600     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1601     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1602     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1603   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1604 
1605   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatPermute_MPIAIJ"
1611 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1612 {
1613   Mat            aA,aB,Aperm;
1614   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1615   PetscScalar    *aa,*ba;
1616   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1617   PetscSF        rowsf,sf;
1618   IS             parcolp = NULL;
1619   PetscBool      done;
1620   PetscErrorCode ierr;
1621 
1622   PetscFunctionBegin;
1623   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1624   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1625   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1626   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1627 
1628   /* Invert row permutation to find out where my rows should go */
1629   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1630   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1631   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1633   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1634   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1635 
1636   /* Invert column permutation to find out where my columns should go */
1637   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1638   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1639   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1640   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1641   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1642   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1643   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1644 
1645   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1648 
1649   /* Find out where my gcols should go */
1650   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1651   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1652   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1653   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1654   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1655   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1656   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1657   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1658 
1659   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1660   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1661   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     PetscInt row = rdest[i],rowner;
1664     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1665     for (j=ai[i]; j<ai[i+1]; j++) {
1666       PetscInt cowner,col = cdest[aj[j]];
1667       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1668       if (rowner == cowner) dnnz[i]++;
1669       else onnz[i]++;
1670     }
1671     for (j=bi[i]; j<bi[i+1]; j++) {
1672       PetscInt cowner,col = gcdest[bj[j]];
1673       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1674       if (rowner == cowner) dnnz[i]++;
1675       else onnz[i]++;
1676     }
1677   }
1678   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1679   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1683 
1684   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1685   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1686   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1689     PetscInt rowlen;
1690     rowlen = ai[i+1] - ai[i];
1691     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1692     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1693     rowlen = bi[i+1] - bi[i];
1694     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1695     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1696   }
1697   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1700   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1701   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1702   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1703   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1704   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1705   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1706   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1707   *B = Aperm;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1713 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   Mat            A    = mat->A,B = mat->B;
1717   PetscErrorCode ierr;
1718   PetscReal      isend[5],irecv[5];
1719 
1720   PetscFunctionBegin;
1721   info->block_size = 1.0;
1722   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1723 
1724   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1725   isend[3] = info->memory;  isend[4] = info->mallocs;
1726 
1727   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1728 
1729   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1730   isend[3] += info->memory;  isend[4] += info->mallocs;
1731   if (flag == MAT_LOCAL) {
1732     info->nz_used      = isend[0];
1733     info->nz_allocated = isend[1];
1734     info->nz_unneeded  = isend[2];
1735     info->memory       = isend[3];
1736     info->mallocs      = isend[4];
1737   } else if (flag == MAT_GLOBAL_MAX) {
1738     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1739 
1740     info->nz_used      = irecv[0];
1741     info->nz_allocated = irecv[1];
1742     info->nz_unneeded  = irecv[2];
1743     info->memory       = irecv[3];
1744     info->mallocs      = irecv[4];
1745   } else if (flag == MAT_GLOBAL_SUM) {
1746     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1747 
1748     info->nz_used      = irecv[0];
1749     info->nz_allocated = irecv[1];
1750     info->nz_unneeded  = irecv[2];
1751     info->memory       = irecv[3];
1752     info->mallocs      = irecv[4];
1753   }
1754   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1755   info->fill_ratio_needed = 0;
1756   info->factor_mallocs    = 0;
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetOption_MPIAIJ"
1762 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1763 {
1764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   switch (op) {
1769   case MAT_NEW_NONZERO_LOCATIONS:
1770   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1771   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1772   case MAT_KEEP_NONZERO_PATTERN:
1773   case MAT_NEW_NONZERO_LOCATION_ERR:
1774   case MAT_USE_INODES:
1775   case MAT_IGNORE_ZERO_ENTRIES:
1776     MatCheckPreallocated(A,1);
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1779     break;
1780   case MAT_ROW_ORIENTED:
1781     a->roworiented = flg;
1782 
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1785     break;
1786   case MAT_NEW_DIAGONALS:
1787     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1788     break;
1789   case MAT_IGNORE_OFF_PROC_ENTRIES:
1790     a->donotstash = flg;
1791     break;
1792   case MAT_SPD:
1793     A->spd_set = PETSC_TRUE;
1794     A->spd     = flg;
1795     if (flg) {
1796       A->symmetric                  = PETSC_TRUE;
1797       A->structurally_symmetric     = PETSC_TRUE;
1798       A->symmetric_set              = PETSC_TRUE;
1799       A->structurally_symmetric_set = PETSC_TRUE;
1800     }
1801     break;
1802   case MAT_SYMMETRIC:
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     break;
1805   case MAT_STRUCTURALLY_SYMMETRIC:
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_HERMITIAN:
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     break;
1811   case MAT_SYMMETRY_ETERNAL:
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     break;
1814   default:
1815     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1816   }
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatGetRow_MPIAIJ"
1822 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1823 {
1824   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1825   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1826   PetscErrorCode ierr;
1827   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1828   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1829   PetscInt       *cmap,*idx_p;
1830 
1831   PetscFunctionBegin;
1832   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1833   mat->getrowactive = PETSC_TRUE;
1834 
1835   if (!mat->rowvalues && (idx || v)) {
1836     /*
1837         allocate enough space to hold information from the longest row.
1838     */
1839     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1840     PetscInt   max = 1,tmp;
1841     for (i=0; i<matin->rmap->n; i++) {
1842       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1843       if (max < tmp) max = tmp;
1844     }
1845     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1846   }
1847 
1848   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1849   lrow = row - rstart;
1850 
1851   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1852   if (!v)   {pvA = 0; pvB = 0;}
1853   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1854   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1855   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1856   nztot = nzA + nzB;
1857 
1858   cmap = mat->garray;
1859   if (v  || idx) {
1860     if (nztot) {
1861       /* Sort by increasing column numbers, assuming A and B already sorted */
1862       PetscInt imark = -1;
1863       if (v) {
1864         *v = v_p = mat->rowvalues;
1865         for (i=0; i<nzB; i++) {
1866           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1867           else break;
1868         }
1869         imark = i;
1870         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1871         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1872       }
1873       if (idx) {
1874         *idx = idx_p = mat->rowindices;
1875         if (imark > -1) {
1876           for (i=0; i<imark; i++) {
1877             idx_p[i] = cmap[cworkB[i]];
1878           }
1879         } else {
1880           for (i=0; i<nzB; i++) {
1881             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1882             else break;
1883           }
1884           imark = i;
1885         }
1886         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1887         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1888       }
1889     } else {
1890       if (idx) *idx = 0;
1891       if (v)   *v   = 0;
1892     }
1893   }
1894   *nz  = nztot;
1895   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1896   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1902 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1903 {
1904   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1905 
1906   PetscFunctionBegin;
1907   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1908   aij->getrowactive = PETSC_FALSE;
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatNorm_MPIAIJ"
1914 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1915 {
1916   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1917   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1918   PetscErrorCode ierr;
1919   PetscInt       i,j,cstart = mat->cmap->rstart;
1920   PetscReal      sum = 0.0;
1921   MatScalar      *v;
1922 
1923   PetscFunctionBegin;
1924   if (aij->size == 1) {
1925     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1926   } else {
1927     if (type == NORM_FROBENIUS) {
1928       v = amat->a;
1929       for (i=0; i<amat->nz; i++) {
1930         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1931       }
1932       v = bmat->a;
1933       for (i=0; i<bmat->nz; i++) {
1934         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1935       }
1936       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       *norm = PetscSqrtReal(*norm);
1938     } else if (type == NORM_1) { /* max column norm */
1939       PetscReal *tmp,*tmp2;
1940       PetscInt  *jj,*garray = aij->garray;
1941       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1942       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1943       *norm = 0.0;
1944       v     = amat->a; jj = amat->j;
1945       for (j=0; j<amat->nz; j++) {
1946         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1947       }
1948       v = bmat->a; jj = bmat->j;
1949       for (j=0; j<bmat->nz; j++) {
1950         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1951       }
1952       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1953       for (j=0; j<mat->cmap->N; j++) {
1954         if (tmp2[j] > *norm) *norm = tmp2[j];
1955       }
1956       ierr = PetscFree(tmp);CHKERRQ(ierr);
1957       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1958     } else if (type == NORM_INFINITY) { /* max row norm */
1959       PetscReal ntemp = 0.0;
1960       for (j=0; j<aij->A->rmap->n; j++) {
1961         v   = amat->a + amat->i[j];
1962         sum = 0.0;
1963         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1964           sum += PetscAbsScalar(*v); v++;
1965         }
1966         v = bmat->a + bmat->i[j];
1967         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1968           sum += PetscAbsScalar(*v); v++;
1969         }
1970         if (sum > ntemp) ntemp = sum;
1971       }
1972       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1973     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatTranspose_MPIAIJ"
1980 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1981 {
1982   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1983   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1984   PetscErrorCode ierr;
1985   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1986   PetscInt       cstart = A->cmap->rstart,ncol;
1987   Mat            B;
1988   MatScalar      *array;
1989 
1990   PetscFunctionBegin;
1991   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1992 
1993   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1994   ai = Aloc->i; aj = Aloc->j;
1995   bi = Bloc->i; bj = Bloc->j;
1996   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1997     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1998     PetscSFNode          *oloc;
1999     PETSC_UNUSED PetscSF sf;
2000 
2001     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2002     /* compute d_nnz for preallocation */
2003     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<ai[ma]; i++) {
2005       d_nnz[aj[i]]++;
2006       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2007     }
2008     /* compute local off-diagonal contributions */
2009     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2010     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2011     /* map those to global */
2012     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2013     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2014     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2015     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2016     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2017     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2018     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2019 
2020     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2021     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2022     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2023     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2024     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2025     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2026   } else {
2027     B    = *matout;
2028     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2029     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2030   }
2031 
2032   /* copy over the A part */
2033   array = Aloc->a;
2034   row   = A->rmap->rstart;
2035   for (i=0; i<ma; i++) {
2036     ncol = ai[i+1]-ai[i];
2037     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2038     row++;
2039     array += ncol; aj += ncol;
2040   }
2041   aj = Aloc->j;
2042   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2043 
2044   /* copy over the B part */
2045   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2046   array = Bloc->a;
2047   row   = A->rmap->rstart;
2048   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2049   cols_tmp = cols;
2050   for (i=0; i<mb; i++) {
2051     ncol = bi[i+1]-bi[i];
2052     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2053     row++;
2054     array += ncol; cols_tmp += ncol;
2055   }
2056   ierr = PetscFree(cols);CHKERRQ(ierr);
2057 
2058   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2061     *matout = B;
2062   } else {
2063     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2064   }
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2070 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2071 {
2072   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2073   Mat            a    = aij->A,b = aij->B;
2074   PetscErrorCode ierr;
2075   PetscInt       s1,s2,s3;
2076 
2077   PetscFunctionBegin;
2078   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2079   if (rr) {
2080     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2081     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2082     /* Overlap communication with computation. */
2083     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2084   }
2085   if (ll) {
2086     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2087     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2088     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2089   }
2090   /* scale  the diagonal block */
2091   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2092 
2093   if (rr) {
2094     /* Do a scatter end and then right scale the off-diagonal block */
2095     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2096     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2103 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2104 {
2105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2106   PetscErrorCode ierr;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatEqual_MPIAIJ"
2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2116 {
2117   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2118   Mat            a,b,c,d;
2119   PetscBool      flg;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   a = matA->A; b = matA->B;
2124   c = matB->A; d = matB->B;
2125 
2126   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2127   if (flg) {
2128     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2129   }
2130   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatCopy_MPIAIJ"
2136 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2137 {
2138   PetscErrorCode ierr;
2139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2140   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2141 
2142   PetscFunctionBegin;
2143   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2144   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2145     /* because of the column compression in the off-processor part of the matrix a->B,
2146        the number of columns in a->B and b->B may be different, hence we cannot call
2147        the MatCopy() directly on the two parts. If need be, we can provide a more
2148        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2149        then copying the submatrices */
2150     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2151   } else {
2152     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2153     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatSetUp_MPIAIJ"
2160 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2161 {
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2171 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2172 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2173 {
2174   PetscInt       i,m=Y->rmap->N;
2175   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2176   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2177   const PetscInt *xi = x->i,*yi = y->i;
2178 
2179   PetscFunctionBegin;
2180   /* Set the number of nonzeros in the new matrix */
2181   for (i=0; i<m; i++) {
2182     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2183     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2184     nnz[i] = 0;
2185     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2186       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2187       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2188       nnz[i]++;
2189     }
2190     for (; k<nzy; k++) nnz[i]++;
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   PetscInt       i;
2201   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2202   PetscBLASInt   bnz,one=1;
2203   Mat_SeqAIJ     *x,*y;
2204 
2205   PetscFunctionBegin;
2206   if (str == SAME_NONZERO_PATTERN) {
2207     PetscScalar alpha = a;
2208     x    = (Mat_SeqAIJ*)xx->A->data;
2209     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2210     y    = (Mat_SeqAIJ*)yy->A->data;
2211     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2212     x    = (Mat_SeqAIJ*)xx->B->data;
2213     y    = (Mat_SeqAIJ*)yy->B->data;
2214     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2215     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2216   } else if (str == SUBSET_NONZERO_PATTERN) {
2217     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2218 
2219     x = (Mat_SeqAIJ*)xx->B->data;
2220     y = (Mat_SeqAIJ*)yy->B->data;
2221     if (y->xtoy && y->XtoY != xx->B) {
2222       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2223       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2224     }
2225     if (!y->xtoy) { /* get xtoy */
2226       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2227       y->XtoY = xx->B;
2228       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2229     }
2230     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2231   } else {
2232     Mat      B;
2233     PetscInt *nnz_d,*nnz_o;
2234     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2235     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2236     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2237     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2238     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2239     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2240     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2241     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2242     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2244     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2245     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2246     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2247     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatConjugate_MPIAIJ"
2256 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2257 {
2258 #if defined(PETSC_USE_COMPLEX)
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2264   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2265 #else
2266   PetscFunctionBegin;
2267 #endif
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatRealPart_MPIAIJ"
2273 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2280   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2286 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2293   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #if defined(PETSC_HAVE_PBGL)
2298 
2299 #include <boost/parallel/mpi/bsp_process_group.hpp>
2300 #include <boost/graph/distributed/ilu_default_graph.hpp>
2301 #include <boost/graph/distributed/ilu_0_block.hpp>
2302 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2303 #include <boost/graph/distributed/petsc/interface.hpp>
2304 #include <boost/multi_array.hpp>
2305 #include <boost/parallel/distributed_property_map->hpp>
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2309 /*
2310   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2311 */
2312 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2313 {
2314   namespace petsc = boost::distributed::petsc;
2315 
2316   namespace graph_dist = boost::graph::distributed;
2317   using boost::graph::distributed::ilu_default::process_group_type;
2318   using boost::graph::ilu_permuted;
2319 
2320   PetscBool      row_identity, col_identity;
2321   PetscContainer c;
2322   PetscInt       m, n, M, N;
2323   PetscErrorCode ierr;
2324 
2325   PetscFunctionBegin;
2326   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2327   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2328   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2329   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2330 
2331   process_group_type pg;
2332   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2333   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2334   lgraph_type& level_graph = *lgraph_p;
2335   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2336 
2337   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2338   ilu_permuted(level_graph);
2339 
2340   /* put together the new matrix */
2341   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2342   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2343   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2344   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2345   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2346   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349 
2350   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2351   ierr = PetscContainerSetPointer(c, lgraph_p);
2352   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2353   ierr = PetscContainerDestroy(&c);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2359 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2360 {
2361   PetscFunctionBegin;
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatSolve_MPIAIJ"
2367 /*
2368   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2369 */
2370 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2371 {
2372   namespace graph_dist = boost::graph::distributed;
2373 
2374   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2375   lgraph_type    *lgraph_p;
2376   PetscContainer c;
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2381   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2382   ierr = VecCopy(b, x);CHKERRQ(ierr);
2383 
2384   PetscScalar *array_x;
2385   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2386   PetscInt sx;
2387   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2388 
2389   PetscScalar *array_b;
2390   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2391   PetscInt sb;
2392   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2393 
2394   lgraph_type& level_graph = *lgraph_p;
2395   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2396 
2397   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2398   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2399   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2400 
2401   typedef boost::iterator_property_map<array_ref_type::iterator,
2402                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2403   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2404   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2405 
2406   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2407   PetscFunctionReturn(0);
2408 }
2409 #endif
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatDestroy_MatRedundant"
2413 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2414 {
2415   PetscErrorCode ierr;
2416   Mat_Redundant  *redund;
2417   PetscInt       i;
2418   PetscMPIInt    size;
2419 
2420   PetscFunctionBegin;
2421   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2422   if (size == 1) {
2423     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2424     redund = a->redundant;
2425   } else {
2426     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2427     redund = a->redundant;
2428   }
2429   if (redund){
2430     if (redund->matseq) { /* via MatGetSubMatrices()  */
2431       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2432       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2433       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2434       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2435     } else {
2436       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2437       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2438       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2439       for (i=0; i<redund->nrecvs; i++) {
2440         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2441         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2442       }
2443       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2444     }
2445 
2446     if (redund->psubcomm) {
2447       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2448     }
2449     ierr = redund->Destroy(A);CHKERRQ(ierr);
2450     ierr = PetscFree(redund);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2457 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2458 {
2459   PetscMPIInt    rank,size;
2460   MPI_Comm       comm;
2461   PetscErrorCode ierr;
2462   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2463   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2464   PetscInt       *rowrange = mat->rmap->range;
2465   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2466   Mat            A = aij->A,B=aij->B,C=*matredundant;
2467   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2468   PetscScalar    *sbuf_a;
2469   PetscInt       nzlocal=a->nz+b->nz;
2470   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2471   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2472   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2473   MatScalar      *aworkA,*aworkB;
2474   PetscScalar    *vals;
2475   PetscMPIInt    tag1,tag2,tag3,imdex;
2476   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2477   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2478   MPI_Status     recv_status,*send_status;
2479   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2480   PetscInt       **rbuf_j=NULL;
2481   PetscScalar    **rbuf_a=NULL;
2482   Mat_Redundant  *redund =NULL;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2486   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2487   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2488   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2489   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2490 
2491   if (reuse == MAT_REUSE_MATRIX) {
2492     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2493     if (subsize == 1) {
2494       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2495       redund = c->redundant;
2496     } else {
2497       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2498       redund = c->redundant;
2499     }
2500     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2501 
2502     nsends    = redund->nsends;
2503     nrecvs    = redund->nrecvs;
2504     send_rank = redund->send_rank;
2505     recv_rank = redund->recv_rank;
2506     sbuf_nz   = redund->sbuf_nz;
2507     rbuf_nz   = redund->rbuf_nz;
2508     sbuf_j    = redund->sbuf_j;
2509     sbuf_a    = redund->sbuf_a;
2510     rbuf_j    = redund->rbuf_j;
2511     rbuf_a    = redund->rbuf_a;
2512   }
2513 
2514   if (reuse == MAT_INITIAL_MATRIX) {
2515     PetscInt    nleftover,np_subcomm;
2516 
2517     /* get the destination processors' id send_rank, nsends and nrecvs */
2518     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2519 
2520     np_subcomm = size/nsubcomm;
2521     nleftover  = size - nsubcomm*np_subcomm;
2522 
2523     /* block of codes below is specific for INTERLACED */
2524     /* ------------------------------------------------*/
2525     nsends = 0; nrecvs = 0;
2526     for (i=0; i<size; i++) {
2527       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2528         send_rank[nsends++] = i;
2529         recv_rank[nrecvs++] = i;
2530       }
2531     }
2532     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2533       i = size-nleftover-1;
2534       j = 0;
2535       while (j < nsubcomm - nleftover) {
2536         send_rank[nsends++] = i;
2537         i--; j++;
2538       }
2539     }
2540 
2541     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2542       for (i=0; i<nleftover; i++) {
2543         recv_rank[nrecvs++] = size-nleftover+i;
2544       }
2545     }
2546     /*----------------------------------------------*/
2547 
2548     /* allocate sbuf_j, sbuf_a */
2549     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2550     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2551     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2552     /*
2553     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2554     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2555      */
2556   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2557 
2558   /* copy mat's local entries into the buffers */
2559   if (reuse == MAT_INITIAL_MATRIX) {
2560     rownz_max = 0;
2561     rptr      = sbuf_j;
2562     cols      = sbuf_j + rend-rstart + 1;
2563     vals      = sbuf_a;
2564     rptr[0]   = 0;
2565     for (i=0; i<rend-rstart; i++) {
2566       row    = i + rstart;
2567       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2568       ncols  = nzA + nzB;
2569       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2570       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2571       /* load the column indices for this row into cols */
2572       lwrite = 0;
2573       for (l=0; l<nzB; l++) {
2574         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2575           vals[lwrite]   = aworkB[l];
2576           cols[lwrite++] = ctmp;
2577         }
2578       }
2579       for (l=0; l<nzA; l++) {
2580         vals[lwrite]   = aworkA[l];
2581         cols[lwrite++] = cstart + cworkA[l];
2582       }
2583       for (l=0; l<nzB; l++) {
2584         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2585           vals[lwrite]   = aworkB[l];
2586           cols[lwrite++] = ctmp;
2587         }
2588       }
2589       vals     += ncols;
2590       cols     += ncols;
2591       rptr[i+1] = rptr[i] + ncols;
2592       if (rownz_max < ncols) rownz_max = ncols;
2593     }
2594     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2595   } else { /* only copy matrix values into sbuf_a */
2596     rptr    = sbuf_j;
2597     vals    = sbuf_a;
2598     rptr[0] = 0;
2599     for (i=0; i<rend-rstart; i++) {
2600       row    = i + rstart;
2601       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2602       ncols  = nzA + nzB;
2603       cworkB = b->j + b->i[i];
2604       aworkA = a->a + a->i[i];
2605       aworkB = b->a + b->i[i];
2606       lwrite = 0;
2607       for (l=0; l<nzB; l++) {
2608         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2609       }
2610       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2611       for (l=0; l<nzB; l++) {
2612         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2613       }
2614       vals     += ncols;
2615       rptr[i+1] = rptr[i] + ncols;
2616     }
2617   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2618 
2619   /* send nzlocal to others, and recv other's nzlocal */
2620   /*--------------------------------------------------*/
2621   if (reuse == MAT_INITIAL_MATRIX) {
2622     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2623 
2624     s_waits2 = s_waits3 + nsends;
2625     s_waits1 = s_waits2 + nsends;
2626     r_waits1 = s_waits1 + nsends;
2627     r_waits2 = r_waits1 + nrecvs;
2628     r_waits3 = r_waits2 + nrecvs;
2629   } else {
2630     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2631 
2632     r_waits3 = s_waits3 + nsends;
2633   }
2634 
2635   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2636   if (reuse == MAT_INITIAL_MATRIX) {
2637     /* get new tags to keep the communication clean */
2638     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2640     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2641 
2642     /* post receives of other's nzlocal */
2643     for (i=0; i<nrecvs; i++) {
2644       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2645     }
2646     /* send nzlocal to others */
2647     for (i=0; i<nsends; i++) {
2648       sbuf_nz[i] = nzlocal;
2649       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2652     count = nrecvs;
2653     while (count) {
2654       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2655 
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661 
2662       rbuf_nz[imdex] += i + 2;
2663 
2664       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2665       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2666       count--;
2667     }
2668     /* wait on sends of nzlocal */
2669     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2670     /* send mat->i,j to others, and recv from other's */
2671     /*------------------------------------------------*/
2672     for (i=0; i<nsends; i++) {
2673       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2674       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2675     }
2676     /* wait on receives of mat->i,j */
2677     /*------------------------------*/
2678     count = nrecvs;
2679     while (count) {
2680       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2681       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2682       count--;
2683     }
2684     /* wait on sends of mat->i,j */
2685     /*---------------------------*/
2686     if (nsends) {
2687       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2688     }
2689   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2690 
2691   /* post receives, send and receive mat->a */
2692   /*----------------------------------------*/
2693   for (imdex=0; imdex<nrecvs; imdex++) {
2694     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2695   }
2696   for (i=0; i<nsends; i++) {
2697     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2698   }
2699   count = nrecvs;
2700   while (count) {
2701     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2702     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2703     count--;
2704   }
2705   if (nsends) {
2706     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2707   }
2708 
2709   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2710 
2711   /* create redundant matrix */
2712   /*-------------------------*/
2713   if (reuse == MAT_INITIAL_MATRIX) {
2714     const PetscInt *range;
2715     PetscInt       rstart_sub,rend_sub,mloc_sub;
2716 
2717     /* compute rownz_max for preallocation */
2718     for (imdex=0; imdex<nrecvs; imdex++) {
2719       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2720       rptr = rbuf_j[imdex];
2721       for (i=0; i<j; i++) {
2722         ncols = rptr[i+1] - rptr[i];
2723         if (rownz_max < ncols) rownz_max = ncols;
2724       }
2725     }
2726 
2727     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2728 
2729     /* get local size of redundant matrix
2730        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2731     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2732     rstart_sub = range[nsubcomm*subrank];
2733     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2734       rend_sub = range[nsubcomm*(subrank+1)];
2735     } else {
2736       rend_sub = mat->rmap->N;
2737     }
2738     mloc_sub = rend_sub - rstart_sub;
2739 
2740     if (M == N) {
2741       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2742     } else { /* non-square matrix */
2743       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2744     }
2745     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2746     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2747     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2748     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2749   } else {
2750     C = *matredundant;
2751   }
2752 
2753   /* insert local matrix entries */
2754   rptr = sbuf_j;
2755   cols = sbuf_j + rend-rstart + 1;
2756   vals = sbuf_a;
2757   for (i=0; i<rend-rstart; i++) {
2758     row   = i + rstart;
2759     ncols = rptr[i+1] - rptr[i];
2760     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2761     vals += ncols;
2762     cols += ncols;
2763   }
2764   /* insert received matrix entries */
2765   for (imdex=0; imdex<nrecvs; imdex++) {
2766     rstart = rowrange[recv_rank[imdex]];
2767     rend   = rowrange[recv_rank[imdex]+1];
2768     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2769     rptr   = rbuf_j[imdex];
2770     cols   = rbuf_j[imdex] + rend-rstart + 1;
2771     vals   = rbuf_a[imdex];
2772     for (i=0; i<rend-rstart; i++) {
2773       row   = i + rstart;
2774       ncols = rptr[i+1] - rptr[i];
2775       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2776       vals += ncols;
2777       cols += ncols;
2778     }
2779   }
2780   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782 
2783   if (reuse == MAT_INITIAL_MATRIX) {
2784     *matredundant = C;
2785 
2786     /* create a supporting struct and attach it to C for reuse */
2787     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2788     if (subsize == 1) {
2789       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2790       c->redundant = redund;
2791     } else {
2792       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2793       c->redundant = redund;
2794     }
2795 
2796     redund->nzlocal   = nzlocal;
2797     redund->nsends    = nsends;
2798     redund->nrecvs    = nrecvs;
2799     redund->send_rank = send_rank;
2800     redund->recv_rank = recv_rank;
2801     redund->sbuf_nz   = sbuf_nz;
2802     redund->rbuf_nz   = rbuf_nz;
2803     redund->sbuf_j    = sbuf_j;
2804     redund->sbuf_a    = sbuf_a;
2805     redund->rbuf_j    = rbuf_j;
2806     redund->rbuf_a    = rbuf_a;
2807     redund->psubcomm  = NULL;
2808 
2809     redund->Destroy = C->ops->destroy;
2810     C->ops->destroy = MatDestroy_MatRedundant;
2811   }
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2817 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2818 {
2819   PetscErrorCode ierr;
2820   MPI_Comm       comm;
2821   PetscMPIInt    size,subsize;
2822   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2823   Mat_Redundant  *redund=NULL;
2824   PetscSubcomm   psubcomm=NULL;
2825   MPI_Comm       subcomm_in=subcomm;
2826   Mat            *matseq;
2827   IS             isrow,iscol;
2828 
2829   PetscFunctionBegin;
2830   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2831     if (reuse ==  MAT_INITIAL_MATRIX) {
2832       /* create psubcomm, then get subcomm */
2833       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2834       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2835       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2836 
2837       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2838       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2839       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2840       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2841       subcomm = psubcomm->comm;
2842     } else { /* retrieve psubcomm and subcomm */
2843       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2844       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2845       if (subsize == 1) {
2846         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2847         redund = c->redundant;
2848       } else {
2849         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2850         redund = c->redundant;
2851       }
2852       psubcomm = redund->psubcomm;
2853     }
2854     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2855       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2856       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2857         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2858         if (subsize == 1) {
2859           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2860           c->redundant->psubcomm = psubcomm;
2861         } else {
2862           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2863           c->redundant->psubcomm = psubcomm ;
2864         }
2865       }
2866       PetscFunctionReturn(0);
2867     }
2868   }
2869 
2870   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2871   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2872   if (reuse == MAT_INITIAL_MATRIX) {
2873     /* create a local sequential matrix matseq[0] */
2874     mloc_sub = PETSC_DECIDE;
2875     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2876     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2877     rstart = rend - mloc_sub;
2878     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2879     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2880   } else { /* reuse == MAT_REUSE_MATRIX */
2881     if (subsize == 1) {
2882       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2883       redund = c->redundant;
2884     } else {
2885       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2886       redund = c->redundant;
2887     }
2888 
2889     isrow  = redund->isrow;
2890     iscol  = redund->iscol;
2891     matseq = redund->matseq;
2892   }
2893   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2894   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2895 
2896   if (reuse == MAT_INITIAL_MATRIX) {
2897     /* create a supporting struct and attach it to C for reuse */
2898     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2899     if (subsize == 1) {
2900       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2901       c->redundant = redund;
2902     } else {
2903       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2904       c->redundant = redund;
2905     }
2906     redund->isrow    = isrow;
2907     redund->iscol    = iscol;
2908     redund->matseq   = matseq;
2909     redund->psubcomm = psubcomm;
2910     redund->Destroy               = (*matredundant)->ops->destroy;
2911     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2912   }
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2918 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2919 {
2920   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2921   PetscErrorCode ierr;
2922   PetscInt       i,*idxb = 0;
2923   PetscScalar    *va,*vb;
2924   Vec            vtmp;
2925 
2926   PetscFunctionBegin;
2927   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2928   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2929   if (idx) {
2930     for (i=0; i<A->rmap->n; i++) {
2931       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2932     }
2933   }
2934 
2935   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2936   if (idx) {
2937     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2938   }
2939   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2940   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2941 
2942   for (i=0; i<A->rmap->n; i++) {
2943     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2944       va[i] = vb[i];
2945       if (idx) idx[i] = a->garray[idxb[i]];
2946     }
2947   }
2948 
2949   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2950   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2951   ierr = PetscFree(idxb);CHKERRQ(ierr);
2952   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2958 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2959 {
2960   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2961   PetscErrorCode ierr;
2962   PetscInt       i,*idxb = 0;
2963   PetscScalar    *va,*vb;
2964   Vec            vtmp;
2965 
2966   PetscFunctionBegin;
2967   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2968   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2969   if (idx) {
2970     for (i=0; i<A->cmap->n; i++) {
2971       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2972     }
2973   }
2974 
2975   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2976   if (idx) {
2977     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2978   }
2979   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2980   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2981 
2982   for (i=0; i<A->rmap->n; i++) {
2983     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2984       va[i] = vb[i];
2985       if (idx) idx[i] = a->garray[idxb[i]];
2986     }
2987   }
2988 
2989   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2990   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2991   ierr = PetscFree(idxb);CHKERRQ(ierr);
2992   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 #undef __FUNCT__
2997 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2998 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2999 {
3000   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3001   PetscInt       n      = A->rmap->n;
3002   PetscInt       cstart = A->cmap->rstart;
3003   PetscInt       *cmap  = mat->garray;
3004   PetscInt       *diagIdx, *offdiagIdx;
3005   Vec            diagV, offdiagV;
3006   PetscScalar    *a, *diagA, *offdiagA;
3007   PetscInt       r;
3008   PetscErrorCode ierr;
3009 
3010   PetscFunctionBegin;
3011   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3012   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3013   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3014   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3015   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3016   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3017   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3018   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3019   for (r = 0; r < n; ++r) {
3020     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3021       a[r]   = diagA[r];
3022       idx[r] = cstart + diagIdx[r];
3023     } else {
3024       a[r]   = offdiagA[r];
3025       idx[r] = cmap[offdiagIdx[r]];
3026     }
3027   }
3028   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3029   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3030   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3031   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3032   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3033   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3039 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3040 {
3041   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3042   PetscInt       n      = A->rmap->n;
3043   PetscInt       cstart = A->cmap->rstart;
3044   PetscInt       *cmap  = mat->garray;
3045   PetscInt       *diagIdx, *offdiagIdx;
3046   Vec            diagV, offdiagV;
3047   PetscScalar    *a, *diagA, *offdiagA;
3048   PetscInt       r;
3049   PetscErrorCode ierr;
3050 
3051   PetscFunctionBegin;
3052   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3053   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3054   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3055   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3056   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3057   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3058   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3059   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3060   for (r = 0; r < n; ++r) {
3061     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3062       a[r]   = diagA[r];
3063       idx[r] = cstart + diagIdx[r];
3064     } else {
3065       a[r]   = offdiagA[r];
3066       idx[r] = cmap[offdiagIdx[r]];
3067     }
3068   }
3069   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3070   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3071   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3072   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3073   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3074   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3080 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3081 {
3082   PetscErrorCode ierr;
3083   Mat            *dummy;
3084 
3085   PetscFunctionBegin;
3086   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3087   *newmat = *dummy;
3088   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3094 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3095 {
3096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3097   PetscErrorCode ierr;
3098 
3099   PetscFunctionBegin;
3100   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3106 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3107 {
3108   PetscErrorCode ierr;
3109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3110 
3111   PetscFunctionBegin;
3112   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3113   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3114   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3115   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 /* -------------------------------------------------------------------*/
3120 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3121                                        MatGetRow_MPIAIJ,
3122                                        MatRestoreRow_MPIAIJ,
3123                                        MatMult_MPIAIJ,
3124                                 /* 4*/ MatMultAdd_MPIAIJ,
3125                                        MatMultTranspose_MPIAIJ,
3126                                        MatMultTransposeAdd_MPIAIJ,
3127 #if defined(PETSC_HAVE_PBGL)
3128                                        MatSolve_MPIAIJ,
3129 #else
3130                                        0,
3131 #endif
3132                                        0,
3133                                        0,
3134                                 /*10*/ 0,
3135                                        0,
3136                                        0,
3137                                        MatSOR_MPIAIJ,
3138                                        MatTranspose_MPIAIJ,
3139                                 /*15*/ MatGetInfo_MPIAIJ,
3140                                        MatEqual_MPIAIJ,
3141                                        MatGetDiagonal_MPIAIJ,
3142                                        MatDiagonalScale_MPIAIJ,
3143                                        MatNorm_MPIAIJ,
3144                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3145                                        MatAssemblyEnd_MPIAIJ,
3146                                        MatSetOption_MPIAIJ,
3147                                        MatZeroEntries_MPIAIJ,
3148                                 /*24*/ MatZeroRows_MPIAIJ,
3149                                        0,
3150 #if defined(PETSC_HAVE_PBGL)
3151                                        0,
3152 #else
3153                                        0,
3154 #endif
3155                                        0,
3156                                        0,
3157                                 /*29*/ MatSetUp_MPIAIJ,
3158 #if defined(PETSC_HAVE_PBGL)
3159                                        0,
3160 #else
3161                                        0,
3162 #endif
3163                                        0,
3164                                        0,
3165                                        0,
3166                                 /*34*/ MatDuplicate_MPIAIJ,
3167                                        0,
3168                                        0,
3169                                        0,
3170                                        0,
3171                                 /*39*/ MatAXPY_MPIAIJ,
3172                                        MatGetSubMatrices_MPIAIJ,
3173                                        MatIncreaseOverlap_MPIAIJ,
3174                                        MatGetValues_MPIAIJ,
3175                                        MatCopy_MPIAIJ,
3176                                 /*44*/ MatGetRowMax_MPIAIJ,
3177                                        MatScale_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                        MatZeroRowsColumns_MPIAIJ,
3181                                 /*49*/ MatSetRandom_MPIAIJ,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3187                                        0,
3188                                        MatSetUnfactored_MPIAIJ,
3189                                        MatPermute_MPIAIJ,
3190                                        0,
3191                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3192                                        MatDestroy_MPIAIJ,
3193                                        MatView_MPIAIJ,
3194                                        0,
3195                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3196                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3197                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3198                                        0,
3199                                        0,
3200                                        0,
3201                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3202                                        MatGetRowMinAbs_MPIAIJ,
3203                                        0,
3204                                        MatSetColoring_MPIAIJ,
3205                                        0,
3206                                        MatSetValuesAdifor_MPIAIJ,
3207                                 /*75*/ MatFDColoringApply_AIJ,
3208                                        0,
3209                                        0,
3210                                        0,
3211                                        MatFindZeroDiagonals_MPIAIJ,
3212                                 /*80*/ 0,
3213                                        0,
3214                                        0,
3215                                 /*83*/ MatLoad_MPIAIJ,
3216                                        0,
3217                                        0,
3218                                        0,
3219                                        0,
3220                                        0,
3221                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3222                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3223                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3224                                        MatPtAP_MPIAIJ_MPIAIJ,
3225                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3226                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                        0,
3231                                 /*99*/ 0,
3232                                        0,
3233                                        0,
3234                                        MatConjugate_MPIAIJ,
3235                                        0,
3236                                 /*104*/MatSetValuesRow_MPIAIJ,
3237                                        MatRealPart_MPIAIJ,
3238                                        MatImaginaryPart_MPIAIJ,
3239                                        0,
3240                                        0,
3241                                 /*109*/0,
3242                                        MatGetRedundantMatrix_MPIAIJ,
3243                                        MatGetRowMin_MPIAIJ,
3244                                        0,
3245                                        0,
3246                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3247                                        0,
3248                                        0,
3249                                        0,
3250                                        0,
3251                                 /*119*/0,
3252                                        0,
3253                                        0,
3254                                        0,
3255                                        MatGetMultiProcBlock_MPIAIJ,
3256                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3257                                        MatGetColumnNorms_MPIAIJ,
3258                                        MatInvertBlockDiagonal_MPIAIJ,
3259                                        0,
3260                                        MatGetSubMatricesParallel_MPIAIJ,
3261                                 /*129*/0,
3262                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3263                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3264                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3265                                        0,
3266                                 /*134*/0,
3267                                        0,
3268                                        0,
3269                                        0,
3270                                        0,
3271                                 /*139*/0,
3272                                        0,
3273                                        0,
3274                                        MatFDColoringSetUp_MPIXAIJ
3275 };
3276 
3277 /* ----------------------------------------------------------------------------------------*/
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3281 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3282 {
3283   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3284   PetscErrorCode ierr;
3285 
3286   PetscFunctionBegin;
3287   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3288   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3294 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3295 {
3296   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3297   PetscErrorCode ierr;
3298 
3299   PetscFunctionBegin;
3300   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3301   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3307 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3308 {
3309   Mat_MPIAIJ     *b;
3310   PetscErrorCode ierr;
3311 
3312   PetscFunctionBegin;
3313   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3314   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3315   b = (Mat_MPIAIJ*)B->data;
3316 
3317   if (!B->preallocated) {
3318     /* Explicitly create 2 MATSEQAIJ matrices. */
3319     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3320     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3321     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3322     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3323     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3324     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3325     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3326     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3327     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3328     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3329   }
3330 
3331   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3332   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3333   B->preallocated = PETSC_TRUE;
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3339 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3340 {
3341   Mat            mat;
3342   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3343   PetscErrorCode ierr;
3344 
3345   PetscFunctionBegin;
3346   *newmat = 0;
3347   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3348   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3349   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3350   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3351   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3352   a       = (Mat_MPIAIJ*)mat->data;
3353 
3354   mat->factortype   = matin->factortype;
3355   mat->rmap->bs     = matin->rmap->bs;
3356   mat->cmap->bs     = matin->cmap->bs;
3357   mat->assembled    = PETSC_TRUE;
3358   mat->insertmode   = NOT_SET_VALUES;
3359   mat->preallocated = PETSC_TRUE;
3360 
3361   a->size         = oldmat->size;
3362   a->rank         = oldmat->rank;
3363   a->donotstash   = oldmat->donotstash;
3364   a->roworiented  = oldmat->roworiented;
3365   a->rowindices   = 0;
3366   a->rowvalues    = 0;
3367   a->getrowactive = PETSC_FALSE;
3368 
3369   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3370   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3371 
3372   if (oldmat->colmap) {
3373 #if defined(PETSC_USE_CTABLE)
3374     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3375 #else
3376     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3377     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3378     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3379 #endif
3380   } else a->colmap = 0;
3381   if (oldmat->garray) {
3382     PetscInt len;
3383     len  = oldmat->B->cmap->n;
3384     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3385     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3386     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3387   } else a->garray = 0;
3388 
3389   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3390   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3391   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3393   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3395   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3396   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3397   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3398   *newmat = mat;
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatLoad_MPIAIJ"
3406 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3407 {
3408   PetscScalar    *vals,*svals;
3409   MPI_Comm       comm;
3410   PetscErrorCode ierr;
3411   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3412   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3413   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3414   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3415   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3416   int            fd;
3417   PetscInt       bs = 1;
3418 
3419   PetscFunctionBegin;
3420   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3421   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3422   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3423   if (!rank) {
3424     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3425     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3426     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3427   }
3428 
3429   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3430   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3431   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3432 
3433   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3434 
3435   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3436   M    = header[1]; N = header[2];
3437   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3438   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3439   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3440 
3441   /* If global sizes are set, check if they are consistent with that given in the file */
3442   if (sizesset) {
3443     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3444   }
3445   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3446   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3447 
3448   /* determine ownership of all (block) rows */
3449   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3450   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3451   else m = newMat->rmap->n; /* Set by user */
3452 
3453   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3454   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3455 
3456   /* First process needs enough room for process with most rows */
3457   if (!rank) {
3458     mmax = rowners[1];
3459     for (i=2; i<=size; i++) {
3460       mmax = PetscMax(mmax, rowners[i]);
3461     }
3462   } else mmax = -1;             /* unused, but compilers complain */
3463 
3464   rowners[0] = 0;
3465   for (i=2; i<=size; i++) {
3466     rowners[i] += rowners[i-1];
3467   }
3468   rstart = rowners[rank];
3469   rend   = rowners[rank+1];
3470 
3471   /* distribute row lengths to all processors */
3472   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3473   if (!rank) {
3474     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3475     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3476     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3477     for (j=0; j<m; j++) {
3478       procsnz[0] += ourlens[j];
3479     }
3480     for (i=1; i<size; i++) {
3481       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3482       /* calculate the number of nonzeros on each processor */
3483       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3484         procsnz[i] += rowlengths[j];
3485       }
3486       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3487     }
3488     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3489   } else {
3490     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3491   }
3492 
3493   if (!rank) {
3494     /* determine max buffer needed and allocate it */
3495     maxnz = 0;
3496     for (i=0; i<size; i++) {
3497       maxnz = PetscMax(maxnz,procsnz[i]);
3498     }
3499     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3500 
3501     /* read in my part of the matrix column indices  */
3502     nz   = procsnz[0];
3503     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3504     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3505 
3506     /* read in every one elses and ship off */
3507     for (i=1; i<size; i++) {
3508       nz   = procsnz[i];
3509       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3510       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3511     }
3512     ierr = PetscFree(cols);CHKERRQ(ierr);
3513   } else {
3514     /* determine buffer space needed for message */
3515     nz = 0;
3516     for (i=0; i<m; i++) {
3517       nz += ourlens[i];
3518     }
3519     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3520 
3521     /* receive message of column indices*/
3522     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3523   }
3524 
3525   /* determine column ownership if matrix is not square */
3526   if (N != M) {
3527     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3528     else n = newMat->cmap->n;
3529     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3530     cstart = cend - n;
3531   } else {
3532     cstart = rstart;
3533     cend   = rend;
3534     n      = cend - cstart;
3535   }
3536 
3537   /* loop over local rows, determining number of off diagonal entries */
3538   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3539   jj   = 0;
3540   for (i=0; i<m; i++) {
3541     for (j=0; j<ourlens[i]; j++) {
3542       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3543       jj++;
3544     }
3545   }
3546 
3547   for (i=0; i<m; i++) {
3548     ourlens[i] -= offlens[i];
3549   }
3550   if (!sizesset) {
3551     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3552   }
3553 
3554   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3555 
3556   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3557 
3558   for (i=0; i<m; i++) {
3559     ourlens[i] += offlens[i];
3560   }
3561 
3562   if (!rank) {
3563     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3564 
3565     /* read in my part of the matrix numerical values  */
3566     nz   = procsnz[0];
3567     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3568 
3569     /* insert into matrix */
3570     jj      = rstart;
3571     smycols = mycols;
3572     svals   = vals;
3573     for (i=0; i<m; i++) {
3574       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3575       smycols += ourlens[i];
3576       svals   += ourlens[i];
3577       jj++;
3578     }
3579 
3580     /* read in other processors and ship out */
3581     for (i=1; i<size; i++) {
3582       nz   = procsnz[i];
3583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3585     }
3586     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3587   } else {
3588     /* receive numeric values */
3589     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3590 
3591     /* receive message of values*/
3592     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3593 
3594     /* insert into matrix */
3595     jj      = rstart;
3596     smycols = mycols;
3597     svals   = vals;
3598     for (i=0; i<m; i++) {
3599       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3600       smycols += ourlens[i];
3601       svals   += ourlens[i];
3602       jj++;
3603     }
3604   }
3605   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3606   ierr = PetscFree(vals);CHKERRQ(ierr);
3607   ierr = PetscFree(mycols);CHKERRQ(ierr);
3608   ierr = PetscFree(rowners);CHKERRQ(ierr);
3609   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #undef __FUNCT__
3615 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3616 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3617 {
3618   PetscErrorCode ierr;
3619   IS             iscol_local;
3620   PetscInt       csize;
3621 
3622   PetscFunctionBegin;
3623   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3624   if (call == MAT_REUSE_MATRIX) {
3625     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3626     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3627   } else {
3628     PetscInt cbs;
3629     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3630     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3631     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3632   }
3633   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3634   if (call == MAT_INITIAL_MATRIX) {
3635     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3636     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3637   }
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3644 /*
3645     Not great since it makes two copies of the submatrix, first an SeqAIJ
3646   in local and then by concatenating the local matrices the end result.
3647   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3648 
3649   Note: This requires a sequential iscol with all indices.
3650 */
3651 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3652 {
3653   PetscErrorCode ierr;
3654   PetscMPIInt    rank,size;
3655   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3656   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3657   PetscBool      allcolumns, colflag;
3658   Mat            M,Mreuse;
3659   MatScalar      *vwork,*aa;
3660   MPI_Comm       comm;
3661   Mat_SeqAIJ     *aij;
3662 
3663   PetscFunctionBegin;
3664   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3666   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3667 
3668   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3669   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3670   if (colflag && ncol == mat->cmap->N) {
3671     allcolumns = PETSC_TRUE;
3672   } else {
3673     allcolumns = PETSC_FALSE;
3674   }
3675   if (call ==  MAT_REUSE_MATRIX) {
3676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3677     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3678     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3679   } else {
3680     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3681   }
3682 
3683   /*
3684       m - number of local rows
3685       n - number of columns (same on all processors)
3686       rstart - first row in new global matrix generated
3687   */
3688   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3689   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3690   if (call == MAT_INITIAL_MATRIX) {
3691     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3692     ii  = aij->i;
3693     jj  = aij->j;
3694 
3695     /*
3696         Determine the number of non-zeros in the diagonal and off-diagonal
3697         portions of the matrix in order to do correct preallocation
3698     */
3699 
3700     /* first get start and end of "diagonal" columns */
3701     if (csize == PETSC_DECIDE) {
3702       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3703       if (mglobal == n) { /* square matrix */
3704         nlocal = m;
3705       } else {
3706         nlocal = n/size + ((n % size) > rank);
3707       }
3708     } else {
3709       nlocal = csize;
3710     }
3711     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3712     rstart = rend - nlocal;
3713     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3714 
3715     /* next, compute all the lengths */
3716     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3717     olens = dlens + m;
3718     for (i=0; i<m; i++) {
3719       jend = ii[i+1] - ii[i];
3720       olen = 0;
3721       dlen = 0;
3722       for (j=0; j<jend; j++) {
3723         if (*jj < rstart || *jj >= rend) olen++;
3724         else dlen++;
3725         jj++;
3726       }
3727       olens[i] = olen;
3728       dlens[i] = dlen;
3729     }
3730     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3731     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3732     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3733     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3734     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3735     ierr = PetscFree(dlens);CHKERRQ(ierr);
3736   } else {
3737     PetscInt ml,nl;
3738 
3739     M    = *newmat;
3740     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3741     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3742     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3743     /*
3744          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3745        rather than the slower MatSetValues().
3746     */
3747     M->was_assembled = PETSC_TRUE;
3748     M->assembled     = PETSC_FALSE;
3749   }
3750   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3751   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3752   ii   = aij->i;
3753   jj   = aij->j;
3754   aa   = aij->a;
3755   for (i=0; i<m; i++) {
3756     row   = rstart + i;
3757     nz    = ii[i+1] - ii[i];
3758     cwork = jj;     jj += nz;
3759     vwork = aa;     aa += nz;
3760     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3761   }
3762 
3763   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   *newmat = M;
3766 
3767   /* save submatrix used in processor for next request */
3768   if (call ==  MAT_INITIAL_MATRIX) {
3769     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3770     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3777 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3778 {
3779   PetscInt       m,cstart, cend,j,nnz,i,d;
3780   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3781   const PetscInt *JJ;
3782   PetscScalar    *values;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3787 
3788   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3789   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3790   m      = B->rmap->n;
3791   cstart = B->cmap->rstart;
3792   cend   = B->cmap->rend;
3793   rstart = B->rmap->rstart;
3794 
3795   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3796 
3797 #if defined(PETSC_USE_DEBUGGING)
3798   for (i=0; i<m; i++) {
3799     nnz = Ii[i+1]- Ii[i];
3800     JJ  = J + Ii[i];
3801     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3802     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3803     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3804   }
3805 #endif
3806 
3807   for (i=0; i<m; i++) {
3808     nnz     = Ii[i+1]- Ii[i];
3809     JJ      = J + Ii[i];
3810     nnz_max = PetscMax(nnz_max,nnz);
3811     d       = 0;
3812     for (j=0; j<nnz; j++) {
3813       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3814     }
3815     d_nnz[i] = d;
3816     o_nnz[i] = nnz - d;
3817   }
3818   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3819   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3820 
3821   if (v) values = (PetscScalar*)v;
3822   else {
3823     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3824   }
3825 
3826   for (i=0; i<m; i++) {
3827     ii   = i + rstart;
3828     nnz  = Ii[i+1]- Ii[i];
3829     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3830   }
3831   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833 
3834   if (!v) {
3835     ierr = PetscFree(values);CHKERRQ(ierr);
3836   }
3837   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3843 /*@
3844    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3845    (the default parallel PETSc format).
3846 
3847    Collective on MPI_Comm
3848 
3849    Input Parameters:
3850 +  B - the matrix
3851 .  i - the indices into j for the start of each local row (starts with zero)
3852 .  j - the column indices for each local row (starts with zero)
3853 -  v - optional values in the matrix
3854 
3855    Level: developer
3856 
3857    Notes:
3858        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3859      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3860      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3861 
3862        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3863 
3864        The format which is used for the sparse matrix input, is equivalent to a
3865     row-major ordering.. i.e for the following matrix, the input data expected is
3866     as shown:
3867 
3868         1 0 0
3869         2 0 3     P0
3870        -------
3871         4 5 6     P1
3872 
3873      Process0 [P0]: rows_owned=[0,1]
3874         i =  {0,1,3}  [size = nrow+1  = 2+1]
3875         j =  {0,0,2}  [size = nz = 6]
3876         v =  {1,2,3}  [size = nz = 6]
3877 
3878      Process1 [P1]: rows_owned=[2]
3879         i =  {0,3}    [size = nrow+1  = 1+1]
3880         j =  {0,1,2}  [size = nz = 6]
3881         v =  {4,5,6}  [size = nz = 6]
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3886           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3899 /*@C
3900    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3901    (the default parallel PETSc format).  For good matrix assembly performance
3902    the user should preallocate the matrix storage by setting the parameters
3903    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3904    performance can be increased by more than a factor of 50.
3905 
3906    Collective on MPI_Comm
3907 
3908    Input Parameters:
3909 +  A - the matrix
3910 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3911            (same value is used for all local rows)
3912 .  d_nnz - array containing the number of nonzeros in the various rows of the
3913            DIAGONAL portion of the local submatrix (possibly different for each row)
3914            or NULL, if d_nz is used to specify the nonzero structure.
3915            The size of this array is equal to the number of local rows, i.e 'm'.
3916            For matrices that will be factored, you must leave room for (and set)
3917            the diagonal entry even if it is zero.
3918 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3919            submatrix (same value is used for all local rows).
3920 -  o_nnz - array containing the number of nonzeros in the various rows of the
3921            OFF-DIAGONAL portion of the local submatrix (possibly different for
3922            each row) or NULL, if o_nz is used to specify the nonzero
3923            structure. The size of this array is equal to the number
3924            of local rows, i.e 'm'.
3925 
3926    If the *_nnz parameter is given then the *_nz parameter is ignored
3927 
3928    The AIJ format (also called the Yale sparse matrix format or
3929    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3930    storage.  The stored row and column indices begin with zero.
3931    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3932 
3933    The parallel matrix is partitioned such that the first m0 rows belong to
3934    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3935    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3936 
3937    The DIAGONAL portion of the local submatrix of a processor can be defined
3938    as the submatrix which is obtained by extraction the part corresponding to
3939    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3940    first row that belongs to the processor, r2 is the last row belonging to
3941    the this processor, and c1-c2 is range of indices of the local part of a
3942    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3943    common case of a square matrix, the row and column ranges are the same and
3944    the DIAGONAL part is also square. The remaining portion of the local
3945    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3946 
3947    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3948 
3949    You can call MatGetInfo() to get information on how effective the preallocation was;
3950    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3951    You can also run with the option -info and look for messages with the string
3952    malloc in them to see if additional memory allocation was needed.
3953 
3954    Example usage:
3955 
3956    Consider the following 8x8 matrix with 34 non-zero values, that is
3957    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3958    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3959    as follows:
3960 
3961 .vb
3962             1  2  0  |  0  3  0  |  0  4
3963     Proc0   0  5  6  |  7  0  0  |  8  0
3964             9  0 10  | 11  0  0  | 12  0
3965     -------------------------------------
3966            13  0 14  | 15 16 17  |  0  0
3967     Proc1   0 18  0  | 19 20 21  |  0  0
3968             0  0  0  | 22 23  0  | 24  0
3969     -------------------------------------
3970     Proc2  25 26 27  |  0  0 28  | 29  0
3971            30  0  0  | 31 32 33  |  0 34
3972 .ve
3973 
3974    This can be represented as a collection of submatrices as:
3975 
3976 .vb
3977       A B C
3978       D E F
3979       G H I
3980 .ve
3981 
3982    Where the submatrices A,B,C are owned by proc0, D,E,F are
3983    owned by proc1, G,H,I are owned by proc2.
3984 
3985    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3986    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3987    The 'M','N' parameters are 8,8, and have the same values on all procs.
3988 
3989    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3990    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3991    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3992    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3993    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3994    matrix, ans [DF] as another SeqAIJ matrix.
3995 
3996    When d_nz, o_nz parameters are specified, d_nz storage elements are
3997    allocated for every row of the local diagonal submatrix, and o_nz
3998    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3999    One way to choose d_nz and o_nz is to use the max nonzerors per local
4000    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4001    In this case, the values of d_nz,o_nz are:
4002 .vb
4003      proc0 : dnz = 2, o_nz = 2
4004      proc1 : dnz = 3, o_nz = 2
4005      proc2 : dnz = 1, o_nz = 4
4006 .ve
4007    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4008    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4009    for proc3. i.e we are using 12+15+10=37 storage locations to store
4010    34 values.
4011 
4012    When d_nnz, o_nnz parameters are specified, the storage is specified
4013    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4014    In the above case the values for d_nnz,o_nnz are:
4015 .vb
4016      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4017      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4018      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4019 .ve
4020    Here the space allocated is sum of all the above values i.e 34, and
4021    hence pre-allocation is perfect.
4022 
4023    Level: intermediate
4024 
4025 .keywords: matrix, aij, compressed row, sparse, parallel
4026 
4027 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4028           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4029 @*/
4030 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4031 {
4032   PetscErrorCode ierr;
4033 
4034   PetscFunctionBegin;
4035   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4036   PetscValidType(B,1);
4037   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4038   PetscFunctionReturn(0);
4039 }
4040 
4041 #undef __FUNCT__
4042 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4043 /*@
4044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4045          CSR format the local rows.
4046 
4047    Collective on MPI_Comm
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (Cannot be PETSC_DECIDE)
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .   i - row indices
4058 .   j - column indices
4059 -   a - matrix values
4060 
4061    Output Parameter:
4062 .   mat - the matrix
4063 
4064    Level: intermediate
4065 
4066    Notes:
4067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4070 
4071        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4072 
4073        The format which is used for the sparse matrix input, is equivalent to a
4074     row-major ordering.. i.e for the following matrix, the input data expected is
4075     as shown:
4076 
4077         1 0 0
4078         2 0 3     P0
4079        -------
4080         4 5 6     P1
4081 
4082      Process0 [P0]: rows_owned=[0,1]
4083         i =  {0,1,3}  [size = nrow+1  = 2+1]
4084         j =  {0,0,2}  [size = nz = 6]
4085         v =  {1,2,3}  [size = nz = 6]
4086 
4087      Process1 [P1]: rows_owned=[2]
4088         i =  {0,3}    [size = nrow+1  = 1+1]
4089         j =  {0,1,2}  [size = nz = 6]
4090         v =  {4,5,6}  [size = nz = 6]
4091 
4092 .keywords: matrix, aij, compressed row, sparse, parallel
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4096 @*/
4097 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4098 {
4099   PetscErrorCode ierr;
4100 
4101   PetscFunctionBegin;
4102   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4103   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4104   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4105   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4106   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4107   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4108   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4109   PetscFunctionReturn(0);
4110 }
4111 
4112 #undef __FUNCT__
4113 #define __FUNCT__ "MatCreateAIJ"
4114 /*@C
4115    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4116    (the default parallel PETSc format).  For good matrix assembly performance
4117    the user should preallocate the matrix storage by setting the parameters
4118    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4119    performance can be increased by more than a factor of 50.
4120 
4121    Collective on MPI_Comm
4122 
4123    Input Parameters:
4124 +  comm - MPI communicator
4125 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4126            This value should be the same as the local size used in creating the
4127            y vector for the matrix-vector product y = Ax.
4128 .  n - This value should be the same as the local size used in creating the
4129        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4130        calculated if N is given) For square matrices n is almost always m.
4131 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4132 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4133 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4134            (same value is used for all local rows)
4135 .  d_nnz - array containing the number of nonzeros in the various rows of the
4136            DIAGONAL portion of the local submatrix (possibly different for each row)
4137            or NULL, if d_nz is used to specify the nonzero structure.
4138            The size of this array is equal to the number of local rows, i.e 'm'.
4139 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4140            submatrix (same value is used for all local rows).
4141 -  o_nnz - array containing the number of nonzeros in the various rows of the
4142            OFF-DIAGONAL portion of the local submatrix (possibly different for
4143            each row) or NULL, if o_nz is used to specify the nonzero
4144            structure. The size of this array is equal to the number
4145            of local rows, i.e 'm'.
4146 
4147    Output Parameter:
4148 .  A - the matrix
4149 
4150    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4151    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4152    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4153 
4154    Notes:
4155    If the *_nnz parameter is given then the *_nz parameter is ignored
4156 
4157    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4158    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4159    storage requirements for this matrix.
4160 
4161    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4162    processor than it must be used on all processors that share the object for
4163    that argument.
4164 
4165    The user MUST specify either the local or global matrix dimensions
4166    (possibly both).
4167 
4168    The parallel matrix is partitioned across processors such that the
4169    first m0 rows belong to process 0, the next m1 rows belong to
4170    process 1, the next m2 rows belong to process 2 etc.. where
4171    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4172    values corresponding to [m x N] submatrix.
4173 
4174    The columns are logically partitioned with the n0 columns belonging
4175    to 0th partition, the next n1 columns belonging to the next
4176    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4177 
4178    The DIAGONAL portion of the local submatrix on any given processor
4179    is the submatrix corresponding to the rows and columns m,n
4180    corresponding to the given processor. i.e diagonal matrix on
4181    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4182    etc. The remaining portion of the local submatrix [m x (N-n)]
4183    constitute the OFF-DIAGONAL portion. The example below better
4184    illustrates this concept.
4185 
4186    For a square global matrix we define each processor's diagonal portion
4187    to be its local rows and the corresponding columns (a square submatrix);
4188    each processor's off-diagonal portion encompasses the remainder of the
4189    local matrix (a rectangular submatrix).
4190 
4191    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4192 
4193    When calling this routine with a single process communicator, a matrix of
4194    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4195    type of communicator, use the construction mechanism:
4196      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4197 
4198    By default, this format uses inodes (identical nodes) when possible.
4199    We search for consecutive rows with the same nonzero structure, thereby
4200    reusing matrix information to achieve increased efficiency.
4201 
4202    Options Database Keys:
4203 +  -mat_no_inode  - Do not use inodes
4204 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4205 -  -mat_aij_oneindex - Internally use indexing starting at 1
4206         rather than 0.  Note that when calling MatSetValues(),
4207         the user still MUST index entries starting at 0!
4208 
4209 
4210    Example usage:
4211 
4212    Consider the following 8x8 matrix with 34 non-zero values, that is
4213    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4214    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4215    as follows:
4216 
4217 .vb
4218             1  2  0  |  0  3  0  |  0  4
4219     Proc0   0  5  6  |  7  0  0  |  8  0
4220             9  0 10  | 11  0  0  | 12  0
4221     -------------------------------------
4222            13  0 14  | 15 16 17  |  0  0
4223     Proc1   0 18  0  | 19 20 21  |  0  0
4224             0  0  0  | 22 23  0  | 24  0
4225     -------------------------------------
4226     Proc2  25 26 27  |  0  0 28  | 29  0
4227            30  0  0  | 31 32 33  |  0 34
4228 .ve
4229 
4230    This can be represented as a collection of submatrices as:
4231 
4232 .vb
4233       A B C
4234       D E F
4235       G H I
4236 .ve
4237 
4238    Where the submatrices A,B,C are owned by proc0, D,E,F are
4239    owned by proc1, G,H,I are owned by proc2.
4240 
4241    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4242    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4243    The 'M','N' parameters are 8,8, and have the same values on all procs.
4244 
4245    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4246    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4247    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4248    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4249    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4250    matrix, ans [DF] as another SeqAIJ matrix.
4251 
4252    When d_nz, o_nz parameters are specified, d_nz storage elements are
4253    allocated for every row of the local diagonal submatrix, and o_nz
4254    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4255    One way to choose d_nz and o_nz is to use the max nonzerors per local
4256    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4257    In this case, the values of d_nz,o_nz are:
4258 .vb
4259      proc0 : dnz = 2, o_nz = 2
4260      proc1 : dnz = 3, o_nz = 2
4261      proc2 : dnz = 1, o_nz = 4
4262 .ve
4263    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4264    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4265    for proc3. i.e we are using 12+15+10=37 storage locations to store
4266    34 values.
4267 
4268    When d_nnz, o_nnz parameters are specified, the storage is specified
4269    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4270    In the above case the values for d_nnz,o_nnz are:
4271 .vb
4272      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4273      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4274      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4275 .ve
4276    Here the space allocated is sum of all the above values i.e 34, and
4277    hence pre-allocation is perfect.
4278 
4279    Level: intermediate
4280 
4281 .keywords: matrix, aij, compressed row, sparse, parallel
4282 
4283 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4284           MPIAIJ, MatCreateMPIAIJWithArrays()
4285 @*/
4286 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4287 {
4288   PetscErrorCode ierr;
4289   PetscMPIInt    size;
4290 
4291   PetscFunctionBegin;
4292   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4293   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4294   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4295   if (size > 1) {
4296     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4297     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4298   } else {
4299     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4300     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4301   }
4302   PetscFunctionReturn(0);
4303 }
4304 
4305 #undef __FUNCT__
4306 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4307 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4308 {
4309   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4310 
4311   PetscFunctionBegin;
4312   *Ad     = a->A;
4313   *Ao     = a->B;
4314   *colmap = a->garray;
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4320 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4321 {
4322   PetscErrorCode ierr;
4323   PetscInt       i;
4324   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4325 
4326   PetscFunctionBegin;
4327   if (coloring->ctype == IS_COLORING_GLOBAL) {
4328     ISColoringValue *allcolors,*colors;
4329     ISColoring      ocoloring;
4330 
4331     /* set coloring for diagonal portion */
4332     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4333 
4334     /* set coloring for off-diagonal portion */
4335     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4336     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4337     for (i=0; i<a->B->cmap->n; i++) {
4338       colors[i] = allcolors[a->garray[i]];
4339     }
4340     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4341     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4342     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4343     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4344   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4345     ISColoringValue *colors;
4346     PetscInt        *larray;
4347     ISColoring      ocoloring;
4348 
4349     /* set coloring for diagonal portion */
4350     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4351     for (i=0; i<a->A->cmap->n; i++) {
4352       larray[i] = i + A->cmap->rstart;
4353     }
4354     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4355     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4356     for (i=0; i<a->A->cmap->n; i++) {
4357       colors[i] = coloring->colors[larray[i]];
4358     }
4359     ierr = PetscFree(larray);CHKERRQ(ierr);
4360     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4361     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4362     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4363 
4364     /* set coloring for off-diagonal portion */
4365     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4366     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4367     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4368     for (i=0; i<a->B->cmap->n; i++) {
4369       colors[i] = coloring->colors[larray[i]];
4370     }
4371     ierr = PetscFree(larray);CHKERRQ(ierr);
4372     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4373     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4374     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4375   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4376   PetscFunctionReturn(0);
4377 }
4378 
4379 #undef __FUNCT__
4380 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4381 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4382 {
4383   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4384   PetscErrorCode ierr;
4385 
4386   PetscFunctionBegin;
4387   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4388   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4394 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4395 {
4396   PetscErrorCode ierr;
4397   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4398   PetscInt       *indx;
4399 
4400   PetscFunctionBegin;
4401   /* This routine will ONLY return MPIAIJ type matrix */
4402   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4403   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4404   if (n == PETSC_DECIDE) {
4405     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4406   }
4407   /* Check sum(n) = N */
4408   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4410 
4411   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412   rstart -= m;
4413 
4414   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4415   for (i=0; i<m; i++) {
4416     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4417     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4418     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419   }
4420 
4421   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4422   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4423   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4424   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4425   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4426   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4432 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4433 {
4434   PetscErrorCode ierr;
4435   PetscInt       m,N,i,rstart,nnz,Ii;
4436   PetscInt       *indx;
4437   PetscScalar    *values;
4438 
4439   PetscFunctionBegin;
4440   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4441   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4442   for (i=0; i<m; i++) {
4443     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4444     Ii   = i + rstart;
4445     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4446     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4447   }
4448   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4455 /*@
4456       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4457                  matrices from each processor
4458 
4459     Collective on MPI_Comm
4460 
4461    Input Parameters:
4462 +    comm - the communicators the parallel matrix will live on
4463 .    inmat - the input sequential matrices
4464 .    n - number of local columns (or PETSC_DECIDE)
4465 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4466 
4467    Output Parameter:
4468 .    outmat - the parallel matrix generated
4469 
4470     Level: advanced
4471 
4472    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4473 
4474 @*/
4475 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4476 {
4477   PetscErrorCode ierr;
4478   PetscMPIInt    size;
4479 
4480   PetscFunctionBegin;
4481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4482   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4483   if (size == 1) {
4484     if (scall == MAT_INITIAL_MATRIX) {
4485       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4486     } else {
4487       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4488     }
4489   } else {
4490     if (scall == MAT_INITIAL_MATRIX) {
4491       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4492     }
4493     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4494   }
4495   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 #undef __FUNCT__
4500 #define __FUNCT__ "MatFileSplit"
4501 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4502 {
4503   PetscErrorCode    ierr;
4504   PetscMPIInt       rank;
4505   PetscInt          m,N,i,rstart,nnz;
4506   size_t            len;
4507   const PetscInt    *indx;
4508   PetscViewer       out;
4509   char              *name;
4510   Mat               B;
4511   const PetscScalar *values;
4512 
4513   PetscFunctionBegin;
4514   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4515   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4516   /* Should this be the type of the diagonal block of A? */
4517   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4518   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4519   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4520   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4521   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4522   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4523   for (i=0; i<m; i++) {
4524     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4525     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4526     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4527   }
4528   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4529   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4530 
4531   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4532   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4533   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4534   sprintf(name,"%s.%d",outfile,rank);
4535   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4536   ierr = PetscFree(name);CHKERRQ(ierr);
4537   ierr = MatView(B,out);CHKERRQ(ierr);
4538   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4539   ierr = MatDestroy(&B);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4544 #undef __FUNCT__
4545 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4546 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4547 {
4548   PetscErrorCode      ierr;
4549   Mat_Merge_SeqsToMPI *merge;
4550   PetscContainer      container;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4554   if (container) {
4555     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4568     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4569     ierr = PetscFree(merge);CHKERRQ(ierr);
4570     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4571   }
4572   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4573   PetscFunctionReturn(0);
4574 }
4575 
4576 #include <../src/mat/utils/freespace.h>
4577 #include <petscbt.h>
4578 
4579 #undef __FUNCT__
4580 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4581 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4582 {
4583   PetscErrorCode      ierr;
4584   MPI_Comm            comm;
4585   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4586   PetscMPIInt         size,rank,taga,*len_s;
4587   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4588   PetscInt            proc,m;
4589   PetscInt            **buf_ri,**buf_rj;
4590   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4591   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4592   MPI_Request         *s_waits,*r_waits;
4593   MPI_Status          *status;
4594   MatScalar           *aa=a->a;
4595   MatScalar           **abuf_r,*ba_i;
4596   Mat_Merge_SeqsToMPI *merge;
4597   PetscContainer      container;
4598 
4599   PetscFunctionBegin;
4600   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4601   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4602 
4603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4605 
4606   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4607   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4608 
4609   bi     = merge->bi;
4610   bj     = merge->bj;
4611   buf_ri = merge->buf_ri;
4612   buf_rj = merge->buf_rj;
4613 
4614   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4615   owners = merge->rowmap->range;
4616   len_s  = merge->len_s;
4617 
4618   /* send and recv matrix values */
4619   /*-----------------------------*/
4620   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4621   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4622 
4623   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4624   for (proc=0,k=0; proc<size; proc++) {
4625     if (!len_s[proc]) continue;
4626     i    = owners[proc];
4627     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4628     k++;
4629   }
4630 
4631   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4632   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4633   ierr = PetscFree(status);CHKERRQ(ierr);
4634 
4635   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4636   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4637 
4638   /* insert mat values of mpimat */
4639   /*----------------------------*/
4640   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4641   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4642 
4643   for (k=0; k<merge->nrecv; k++) {
4644     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4645     nrows       = *(buf_ri_k[k]);
4646     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4647     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4648   }
4649 
4650   /* set values of ba */
4651   m = merge->rowmap->n;
4652   for (i=0; i<m; i++) {
4653     arow = owners[rank] + i;
4654     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4655     bnzi = bi[i+1] - bi[i];
4656     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4657 
4658     /* add local non-zero vals of this proc's seqmat into ba */
4659     anzi   = ai[arow+1] - ai[arow];
4660     aj     = a->j + ai[arow];
4661     aa     = a->a + ai[arow];
4662     nextaj = 0;
4663     for (j=0; nextaj<anzi; j++) {
4664       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665         ba_i[j] += aa[nextaj++];
4666       }
4667     }
4668 
4669     /* add received vals into ba */
4670     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4671       /* i-th row */
4672       if (i == *nextrow[k]) {
4673         anzi   = *(nextai[k]+1) - *nextai[k];
4674         aj     = buf_rj[k] + *(nextai[k]);
4675         aa     = abuf_r[k] + *(nextai[k]);
4676         nextaj = 0;
4677         for (j=0; nextaj<anzi; j++) {
4678           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4679             ba_i[j] += aa[nextaj++];
4680           }
4681         }
4682         nextrow[k]++; nextai[k]++;
4683       }
4684     }
4685     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4686   }
4687   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4688   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4689 
4690   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4691   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4692   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4693   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4694   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4695   PetscFunctionReturn(0);
4696 }
4697 
4698 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4699 
4700 #undef __FUNCT__
4701 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4702 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4703 {
4704   PetscErrorCode      ierr;
4705   Mat                 B_mpi;
4706   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4707   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4708   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4709   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4710   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4711   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4712   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4713   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4714   MPI_Status          *status;
4715   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4716   PetscBT             lnkbt;
4717   Mat_Merge_SeqsToMPI *merge;
4718   PetscContainer      container;
4719 
4720   PetscFunctionBegin;
4721   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4722 
4723   /* make sure it is a PETSc comm */
4724   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4725   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4726   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4727 
4728   ierr = PetscNew(&merge);CHKERRQ(ierr);
4729   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4730 
4731   /* determine row ownership */
4732   /*---------------------------------------------------------*/
4733   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4734   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4735   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4736   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4737   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4738   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4739   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4740 
4741   m      = merge->rowmap->n;
4742   owners = merge->rowmap->range;
4743 
4744   /* determine the number of messages to send, their lengths */
4745   /*---------------------------------------------------------*/
4746   len_s = merge->len_s;
4747 
4748   len          = 0; /* length of buf_si[] */
4749   merge->nsend = 0;
4750   for (proc=0; proc<size; proc++) {
4751     len_si[proc] = 0;
4752     if (proc == rank) {
4753       len_s[proc] = 0;
4754     } else {
4755       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4756       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4757     }
4758     if (len_s[proc]) {
4759       merge->nsend++;
4760       nrows = 0;
4761       for (i=owners[proc]; i<owners[proc+1]; i++) {
4762         if (ai[i+1] > ai[i]) nrows++;
4763       }
4764       len_si[proc] = 2*(nrows+1);
4765       len         += len_si[proc];
4766     }
4767   }
4768 
4769   /* determine the number and length of messages to receive for ij-structure */
4770   /*-------------------------------------------------------------------------*/
4771   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4772   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4773 
4774   /* post the Irecv of j-structure */
4775   /*-------------------------------*/
4776   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4777   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4778 
4779   /* post the Isend of j-structure */
4780   /*--------------------------------*/
4781   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4782 
4783   for (proc=0, k=0; proc<size; proc++) {
4784     if (!len_s[proc]) continue;
4785     i    = owners[proc];
4786     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4787     k++;
4788   }
4789 
4790   /* receives and sends of j-structure are complete */
4791   /*------------------------------------------------*/
4792   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4793   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4794 
4795   /* send and recv i-structure */
4796   /*---------------------------*/
4797   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4798   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4799 
4800   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4801   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4802   for (proc=0,k=0; proc<size; proc++) {
4803     if (!len_s[proc]) continue;
4804     /* form outgoing message for i-structure:
4805          buf_si[0]:                 nrows to be sent
4806                [1:nrows]:           row index (global)
4807                [nrows+1:2*nrows+1]: i-structure index
4808     */
4809     /*-------------------------------------------*/
4810     nrows       = len_si[proc]/2 - 1;
4811     buf_si_i    = buf_si + nrows+1;
4812     buf_si[0]   = nrows;
4813     buf_si_i[0] = 0;
4814     nrows       = 0;
4815     for (i=owners[proc]; i<owners[proc+1]; i++) {
4816       anzi = ai[i+1] - ai[i];
4817       if (anzi) {
4818         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4819         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4820         nrows++;
4821       }
4822     }
4823     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4824     k++;
4825     buf_si += len_si[proc];
4826   }
4827 
4828   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4829   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4830 
4831   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4832   for (i=0; i<merge->nrecv; i++) {
4833     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4834   }
4835 
4836   ierr = PetscFree(len_si);CHKERRQ(ierr);
4837   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4838   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4839   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4840   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4841   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4842   ierr = PetscFree(status);CHKERRQ(ierr);
4843 
4844   /* compute a local seq matrix in each processor */
4845   /*----------------------------------------------*/
4846   /* allocate bi array and free space for accumulating nonzero column info */
4847   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4848   bi[0] = 0;
4849 
4850   /* create and initialize a linked list */
4851   nlnk = N+1;
4852   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4853 
4854   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4855   len  = ai[owners[rank+1]] - ai[owners[rank]];
4856   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4857 
4858   current_space = free_space;
4859 
4860   /* determine symbolic info for each local row */
4861   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4862 
4863   for (k=0; k<merge->nrecv; k++) {
4864     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4865     nrows       = *buf_ri_k[k];
4866     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4867     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4868   }
4869 
4870   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4871   len  = 0;
4872   for (i=0; i<m; i++) {
4873     bnzi = 0;
4874     /* add local non-zero cols of this proc's seqmat into lnk */
4875     arow  = owners[rank] + i;
4876     anzi  = ai[arow+1] - ai[arow];
4877     aj    = a->j + ai[arow];
4878     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4879     bnzi += nlnk;
4880     /* add received col data into lnk */
4881     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4882       if (i == *nextrow[k]) { /* i-th row */
4883         anzi  = *(nextai[k]+1) - *nextai[k];
4884         aj    = buf_rj[k] + *nextai[k];
4885         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4886         bnzi += nlnk;
4887         nextrow[k]++; nextai[k]++;
4888       }
4889     }
4890     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4891 
4892     /* if free space is not available, make more free space */
4893     if (current_space->local_remaining<bnzi) {
4894       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4895       nspacedouble++;
4896     }
4897     /* copy data into free space, then initialize lnk */
4898     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4899     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4900 
4901     current_space->array           += bnzi;
4902     current_space->local_used      += bnzi;
4903     current_space->local_remaining -= bnzi;
4904 
4905     bi[i+1] = bi[i] + bnzi;
4906   }
4907 
4908   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4909 
4910   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4911   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4912   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4913 
4914   /* create symbolic parallel matrix B_mpi */
4915   /*---------------------------------------*/
4916   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4917   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4918   if (n==PETSC_DECIDE) {
4919     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4920   } else {
4921     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4922   }
4923   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4924   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4925   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4926   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4927   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4928 
4929   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4930   B_mpi->assembled    = PETSC_FALSE;
4931   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4932   merge->bi           = bi;
4933   merge->bj           = bj;
4934   merge->buf_ri       = buf_ri;
4935   merge->buf_rj       = buf_rj;
4936   merge->coi          = NULL;
4937   merge->coj          = NULL;
4938   merge->owners_co    = NULL;
4939 
4940   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4941 
4942   /* attach the supporting struct to B_mpi for reuse */
4943   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4944   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4945   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4946   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4947   *mpimat = B_mpi;
4948 
4949   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4950   PetscFunctionReturn(0);
4951 }
4952 
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4955 /*@C
4956       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4957                  matrices from each processor
4958 
4959     Collective on MPI_Comm
4960 
4961    Input Parameters:
4962 +    comm - the communicators the parallel matrix will live on
4963 .    seqmat - the input sequential matrices
4964 .    m - number of local rows (or PETSC_DECIDE)
4965 .    n - number of local columns (or PETSC_DECIDE)
4966 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4967 
4968    Output Parameter:
4969 .    mpimat - the parallel matrix generated
4970 
4971     Level: advanced
4972 
4973    Notes:
4974      The dimensions of the sequential matrix in each processor MUST be the same.
4975      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4976      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4977 @*/
4978 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4979 {
4980   PetscErrorCode ierr;
4981   PetscMPIInt    size;
4982 
4983   PetscFunctionBegin;
4984   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4985   if (size == 1) {
4986     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4987     if (scall == MAT_INITIAL_MATRIX) {
4988       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4989     } else {
4990       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4991     }
4992     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993     PetscFunctionReturn(0);
4994   }
4995   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4996   if (scall == MAT_INITIAL_MATRIX) {
4997     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4998   }
4999   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5000   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5001   PetscFunctionReturn(0);
5002 }
5003 
5004 #undef __FUNCT__
5005 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5006 /*@
5007      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5008           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5009           with MatGetSize()
5010 
5011     Not Collective
5012 
5013    Input Parameters:
5014 +    A - the matrix
5015 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5016 
5017    Output Parameter:
5018 .    A_loc - the local sequential matrix generated
5019 
5020     Level: developer
5021 
5022 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5023 
5024 @*/
5025 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5026 {
5027   PetscErrorCode ierr;
5028   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5029   Mat_SeqAIJ     *mat,*a,*b;
5030   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5031   MatScalar      *aa,*ba,*cam;
5032   PetscScalar    *ca;
5033   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5034   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5035   PetscBool      match;
5036 
5037   PetscFunctionBegin;
5038   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5039   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5040   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5041   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5042   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5043   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5044   aa = a->a; ba = b->a;
5045   if (scall == MAT_INITIAL_MATRIX) {
5046     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
5047     ci[0] = 0;
5048     for (i=0; i<am; i++) {
5049       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5050     }
5051     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
5052     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
5053     k    = 0;
5054     for (i=0; i<am; i++) {
5055       ncols_o = bi[i+1] - bi[i];
5056       ncols_d = ai[i+1] - ai[i];
5057       /* off-diagonal portion of A */
5058       for (jo=0; jo<ncols_o; jo++) {
5059         col = cmap[*bj];
5060         if (col >= cstart) break;
5061         cj[k]   = col; bj++;
5062         ca[k++] = *ba++;
5063       }
5064       /* diagonal portion of A */
5065       for (j=0; j<ncols_d; j++) {
5066         cj[k]   = cstart + *aj++;
5067         ca[k++] = *aa++;
5068       }
5069       /* off-diagonal portion of A */
5070       for (j=jo; j<ncols_o; j++) {
5071         cj[k]   = cmap[*bj++];
5072         ca[k++] = *ba++;
5073       }
5074     }
5075     /* put together the new matrix */
5076     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5077     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5078     /* Since these are PETSc arrays, change flags to free them as necessary. */
5079     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5080     mat->free_a  = PETSC_TRUE;
5081     mat->free_ij = PETSC_TRUE;
5082     mat->nonew   = 0;
5083   } else if (scall == MAT_REUSE_MATRIX) {
5084     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5085     ci = mat->i; cj = mat->j; cam = mat->a;
5086     for (i=0; i<am; i++) {
5087       /* off-diagonal portion of A */
5088       ncols_o = bi[i+1] - bi[i];
5089       for (jo=0; jo<ncols_o; jo++) {
5090         col = cmap[*bj];
5091         if (col >= cstart) break;
5092         *cam++ = *ba++; bj++;
5093       }
5094       /* diagonal portion of A */
5095       ncols_d = ai[i+1] - ai[i];
5096       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5097       /* off-diagonal portion of A */
5098       for (j=jo; j<ncols_o; j++) {
5099         *cam++ = *ba++; bj++;
5100       }
5101     }
5102   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5103   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5104   PetscFunctionReturn(0);
5105 }
5106 
5107 #undef __FUNCT__
5108 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5109 /*@C
5110      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5111 
5112     Not Collective
5113 
5114    Input Parameters:
5115 +    A - the matrix
5116 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5117 -    row, col - index sets of rows and columns to extract (or NULL)
5118 
5119    Output Parameter:
5120 .    A_loc - the local sequential matrix generated
5121 
5122     Level: developer
5123 
5124 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5125 
5126 @*/
5127 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5128 {
5129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5130   PetscErrorCode ierr;
5131   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5132   IS             isrowa,iscola;
5133   Mat            *aloc;
5134   PetscBool      match;
5135 
5136   PetscFunctionBegin;
5137   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5138   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5139   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5140   if (!row) {
5141     start = A->rmap->rstart; end = A->rmap->rend;
5142     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5143   } else {
5144     isrowa = *row;
5145   }
5146   if (!col) {
5147     start = A->cmap->rstart;
5148     cmap  = a->garray;
5149     nzA   = a->A->cmap->n;
5150     nzB   = a->B->cmap->n;
5151     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5152     ncols = 0;
5153     for (i=0; i<nzB; i++) {
5154       if (cmap[i] < start) idx[ncols++] = cmap[i];
5155       else break;
5156     }
5157     imark = i;
5158     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5159     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5160     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5161   } else {
5162     iscola = *col;
5163   }
5164   if (scall != MAT_INITIAL_MATRIX) {
5165     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5166     aloc[0] = *A_loc;
5167   }
5168   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5169   *A_loc = aloc[0];
5170   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5171   if (!row) {
5172     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5173   }
5174   if (!col) {
5175     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5176   }
5177   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 #undef __FUNCT__
5182 #define __FUNCT__ "MatGetBrowsOfAcols"
5183 /*@C
5184     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5185 
5186     Collective on Mat
5187 
5188    Input Parameters:
5189 +    A,B - the matrices in mpiaij format
5190 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5192 
5193    Output Parameter:
5194 +    rowb, colb - index sets of rows and columns of B to extract
5195 -    B_seq - the sequential matrix generated
5196 
5197     Level: developer
5198 
5199 @*/
5200 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5201 {
5202   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5203   PetscErrorCode ierr;
5204   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5205   IS             isrowb,iscolb;
5206   Mat            *bseq=NULL;
5207 
5208   PetscFunctionBegin;
5209   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5210     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5211   }
5212   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5213 
5214   if (scall == MAT_INITIAL_MATRIX) {
5215     start = A->cmap->rstart;
5216     cmap  = a->garray;
5217     nzA   = a->A->cmap->n;
5218     nzB   = a->B->cmap->n;
5219     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5220     ncols = 0;
5221     for (i=0; i<nzB; i++) {  /* row < local row index */
5222       if (cmap[i] < start) idx[ncols++] = cmap[i];
5223       else break;
5224     }
5225     imark = i;
5226     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5227     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5228     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5229     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5230   } else {
5231     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5232     isrowb  = *rowb; iscolb = *colb;
5233     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5234     bseq[0] = *B_seq;
5235   }
5236   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5237   *B_seq = bseq[0];
5238   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5239   if (!rowb) {
5240     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5241   } else {
5242     *rowb = isrowb;
5243   }
5244   if (!colb) {
5245     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5246   } else {
5247     *colb = iscolb;
5248   }
5249   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5250   PetscFunctionReturn(0);
5251 }
5252 
5253 #undef __FUNCT__
5254 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5255 /*
5256     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5257     of the OFF-DIAGONAL portion of local A
5258 
5259     Collective on Mat
5260 
5261    Input Parameters:
5262 +    A,B - the matrices in mpiaij format
5263 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5264 
5265    Output Parameter:
5266 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5267 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5268 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5269 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5270 
5271     Level: developer
5272 
5273 */
5274 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5275 {
5276   VecScatter_MPI_General *gen_to,*gen_from;
5277   PetscErrorCode         ierr;
5278   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5279   Mat_SeqAIJ             *b_oth;
5280   VecScatter             ctx =a->Mvctx;
5281   MPI_Comm               comm;
5282   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5283   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5284   PetscScalar            *rvalues,*svalues;
5285   MatScalar              *b_otha,*bufa,*bufA;
5286   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5287   MPI_Request            *rwaits = NULL,*swaits = NULL;
5288   MPI_Status             *sstatus,rstatus;
5289   PetscMPIInt            jj;
5290   PetscInt               *cols,sbs,rbs;
5291   PetscScalar            *vals;
5292 
5293   PetscFunctionBegin;
5294   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5295   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5296     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5297   }
5298   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5299   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5300 
5301   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5302   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5303   rvalues  = gen_from->values; /* holds the length of receiving row */
5304   svalues  = gen_to->values;   /* holds the length of sending row */
5305   nrecvs   = gen_from->n;
5306   nsends   = gen_to->n;
5307 
5308   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5309   srow    = gen_to->indices;    /* local row index to be sent */
5310   sstarts = gen_to->starts;
5311   sprocs  = gen_to->procs;
5312   sstatus = gen_to->sstatus;
5313   sbs     = gen_to->bs;
5314   rstarts = gen_from->starts;
5315   rprocs  = gen_from->procs;
5316   rbs     = gen_from->bs;
5317 
5318   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5319   if (scall == MAT_INITIAL_MATRIX) {
5320     /* i-array */
5321     /*---------*/
5322     /*  post receives */
5323     for (i=0; i<nrecvs; i++) {
5324       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5325       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5326       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5327     }
5328 
5329     /* pack the outgoing message */
5330     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5331 
5332     sstartsj[0] = 0;
5333     rstartsj[0] = 0;
5334     len         = 0; /* total length of j or a array to be sent */
5335     k           = 0;
5336     for (i=0; i<nsends; i++) {
5337       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5338       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5339       for (j=0; j<nrows; j++) {
5340         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5341         for (l=0; l<sbs; l++) {
5342           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5343 
5344           rowlen[j*sbs+l] = ncols;
5345 
5346           len += ncols;
5347           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5348         }
5349         k++;
5350       }
5351       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5352 
5353       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5354     }
5355     /* recvs and sends of i-array are completed */
5356     i = nrecvs;
5357     while (i--) {
5358       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5359     }
5360     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5361 
5362     /* allocate buffers for sending j and a arrays */
5363     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5364     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5365 
5366     /* create i-array of B_oth */
5367     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5368 
5369     b_othi[0] = 0;
5370     len       = 0; /* total length of j or a array to be received */
5371     k         = 0;
5372     for (i=0; i<nrecvs; i++) {
5373       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5374       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5375       for (j=0; j<nrows; j++) {
5376         b_othi[k+1] = b_othi[k] + rowlen[j];
5377         len        += rowlen[j]; k++;
5378       }
5379       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5380     }
5381 
5382     /* allocate space for j and a arrrays of B_oth */
5383     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5384     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5385 
5386     /* j-array */
5387     /*---------*/
5388     /*  post receives of j-array */
5389     for (i=0; i<nrecvs; i++) {
5390       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5391       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5392     }
5393 
5394     /* pack the outgoing message j-array */
5395     k = 0;
5396     for (i=0; i<nsends; i++) {
5397       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5398       bufJ  = bufj+sstartsj[i];
5399       for (j=0; j<nrows; j++) {
5400         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5401         for (ll=0; ll<sbs; ll++) {
5402           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5403           for (l=0; l<ncols; l++) {
5404             *bufJ++ = cols[l];
5405           }
5406           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5407         }
5408       }
5409       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5410     }
5411 
5412     /* recvs and sends of j-array are completed */
5413     i = nrecvs;
5414     while (i--) {
5415       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5416     }
5417     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5418   } else if (scall == MAT_REUSE_MATRIX) {
5419     sstartsj = *startsj_s;
5420     rstartsj = *startsj_r;
5421     bufa     = *bufa_ptr;
5422     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5423     b_otha   = b_oth->a;
5424   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5425 
5426   /* a-array */
5427   /*---------*/
5428   /*  post receives of a-array */
5429   for (i=0; i<nrecvs; i++) {
5430     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5431     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5432   }
5433 
5434   /* pack the outgoing message a-array */
5435   k = 0;
5436   for (i=0; i<nsends; i++) {
5437     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5438     bufA  = bufa+sstartsj[i];
5439     for (j=0; j<nrows; j++) {
5440       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5441       for (ll=0; ll<sbs; ll++) {
5442         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5443         for (l=0; l<ncols; l++) {
5444           *bufA++ = vals[l];
5445         }
5446         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5447       }
5448     }
5449     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5450   }
5451   /* recvs and sends of a-array are completed */
5452   i = nrecvs;
5453   while (i--) {
5454     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5455   }
5456   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5457   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5458 
5459   if (scall == MAT_INITIAL_MATRIX) {
5460     /* put together the new matrix */
5461     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5462 
5463     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5464     /* Since these are PETSc arrays, change flags to free them as necessary. */
5465     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5466     b_oth->free_a  = PETSC_TRUE;
5467     b_oth->free_ij = PETSC_TRUE;
5468     b_oth->nonew   = 0;
5469 
5470     ierr = PetscFree(bufj);CHKERRQ(ierr);
5471     if (!startsj_s || !bufa_ptr) {
5472       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5473       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5474     } else {
5475       *startsj_s = sstartsj;
5476       *startsj_r = rstartsj;
5477       *bufa_ptr  = bufa;
5478     }
5479   }
5480   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5481   PetscFunctionReturn(0);
5482 }
5483 
5484 #undef __FUNCT__
5485 #define __FUNCT__ "MatGetCommunicationStructs"
5486 /*@C
5487   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5488 
5489   Not Collective
5490 
5491   Input Parameters:
5492 . A - The matrix in mpiaij format
5493 
5494   Output Parameter:
5495 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5496 . colmap - A map from global column index to local index into lvec
5497 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5498 
5499   Level: developer
5500 
5501 @*/
5502 #if defined(PETSC_USE_CTABLE)
5503 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5504 #else
5505 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5506 #endif
5507 {
5508   Mat_MPIAIJ *a;
5509 
5510   PetscFunctionBegin;
5511   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5512   PetscValidPointer(lvec, 2);
5513   PetscValidPointer(colmap, 3);
5514   PetscValidPointer(multScatter, 4);
5515   a = (Mat_MPIAIJ*) A->data;
5516   if (lvec) *lvec = a->lvec;
5517   if (colmap) *colmap = a->colmap;
5518   if (multScatter) *multScatter = a->Mvctx;
5519   PetscFunctionReturn(0);
5520 }
5521 
5522 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5523 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5524 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5525 
5526 #undef __FUNCT__
5527 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5528 /*
5529     Computes (B'*A')' since computing B*A directly is untenable
5530 
5531                n                       p                          p
5532         (              )       (              )         (                  )
5533       m (      A       )  *  n (       B      )   =   m (         C        )
5534         (              )       (              )         (                  )
5535 
5536 */
5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5538 {
5539   PetscErrorCode ierr;
5540   Mat            At,Bt,Ct;
5541 
5542   PetscFunctionBegin;
5543   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5544   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5545   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5546   ierr = MatDestroy(&At);CHKERRQ(ierr);
5547   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5548   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5549   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 #undef __FUNCT__
5554 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5555 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5556 {
5557   PetscErrorCode ierr;
5558   PetscInt       m=A->rmap->n,n=B->cmap->n;
5559   Mat            Cmat;
5560 
5561   PetscFunctionBegin;
5562   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5563   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5564   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5565   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5566   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5567   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5568   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5569   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5570 
5571   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5572 
5573   *C = Cmat;
5574   PetscFunctionReturn(0);
5575 }
5576 
5577 /* ----------------------------------------------------------------*/
5578 #undef __FUNCT__
5579 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5580 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5581 {
5582   PetscErrorCode ierr;
5583 
5584   PetscFunctionBegin;
5585   if (scall == MAT_INITIAL_MATRIX) {
5586     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5587     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5588     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5589   }
5590   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5591   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5592   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 #if defined(PETSC_HAVE_MUMPS)
5597 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5598 #endif
5599 #if defined(PETSC_HAVE_PASTIX)
5600 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5601 #endif
5602 #if defined(PETSC_HAVE_SUPERLU_DIST)
5603 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5604 #endif
5605 #if defined(PETSC_HAVE_CLIQUE)
5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5607 #endif
5608 
5609 /*MC
5610    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5611 
5612    Options Database Keys:
5613 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5614 
5615   Level: beginner
5616 
5617 .seealso: MatCreateAIJ()
5618 M*/
5619 
5620 #undef __FUNCT__
5621 #define __FUNCT__ "MatCreate_MPIAIJ"
5622 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5623 {
5624   Mat_MPIAIJ     *b;
5625   PetscErrorCode ierr;
5626   PetscMPIInt    size;
5627 
5628   PetscFunctionBegin;
5629   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5630 
5631   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5632   B->data       = (void*)b;
5633   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5634   B->assembled  = PETSC_FALSE;
5635   B->insertmode = NOT_SET_VALUES;
5636   b->size       = size;
5637 
5638   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5639 
5640   /* build cache for off array entries formed */
5641   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5642 
5643   b->donotstash  = PETSC_FALSE;
5644   b->colmap      = 0;
5645   b->garray      = 0;
5646   b->roworiented = PETSC_TRUE;
5647 
5648   /* stuff used for matrix vector multiply */
5649   b->lvec  = NULL;
5650   b->Mvctx = NULL;
5651 
5652   /* stuff for MatGetRow() */
5653   b->rowindices   = 0;
5654   b->rowvalues    = 0;
5655   b->getrowactive = PETSC_FALSE;
5656 
5657   /* flexible pointer used in CUSP/CUSPARSE classes */
5658   b->spptr = NULL;
5659 
5660 #if defined(PETSC_HAVE_MUMPS)
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5662 #endif
5663 #if defined(PETSC_HAVE_PASTIX)
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5665 #endif
5666 #if defined(PETSC_HAVE_SUPERLU_DIST)
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5668 #endif
5669 #if defined(PETSC_HAVE_CLIQUE)
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5671 #endif
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5686   PetscFunctionReturn(0);
5687 }
5688 
5689 #undef __FUNCT__
5690 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5691 /*@
5692      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5693          and "off-diagonal" part of the matrix in CSR format.
5694 
5695    Collective on MPI_Comm
5696 
5697    Input Parameters:
5698 +  comm - MPI communicator
5699 .  m - number of local rows (Cannot be PETSC_DECIDE)
5700 .  n - This value should be the same as the local size used in creating the
5701        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5702        calculated if N is given) For square matrices n is almost always m.
5703 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5704 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5705 .   i - row indices for "diagonal" portion of matrix
5706 .   j - column indices
5707 .   a - matrix values
5708 .   oi - row indices for "off-diagonal" portion of matrix
5709 .   oj - column indices
5710 -   oa - matrix values
5711 
5712    Output Parameter:
5713 .   mat - the matrix
5714 
5715    Level: advanced
5716 
5717    Notes:
5718        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5719        must free the arrays once the matrix has been destroyed and not before.
5720 
5721        The i and j indices are 0 based
5722 
5723        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5724 
5725        This sets local rows and cannot be used to set off-processor values.
5726 
5727        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5728        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5729        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5730        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5731        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5732        communication if it is known that only local entries will be set.
5733 
5734 .keywords: matrix, aij, compressed row, sparse, parallel
5735 
5736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5737           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5738 @*/
5739 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5740 {
5741   PetscErrorCode ierr;
5742   Mat_MPIAIJ     *maij;
5743 
5744   PetscFunctionBegin;
5745   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5746   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5747   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5748   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5749   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5750   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5751   maij = (Mat_MPIAIJ*) (*mat)->data;
5752 
5753   (*mat)->preallocated = PETSC_TRUE;
5754 
5755   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5756   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5757 
5758   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5759   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5760 
5761   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5762   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5765 
5766   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5767   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5768   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5769   PetscFunctionReturn(0);
5770 }
5771 
5772 /*
5773     Special version for direct calls from Fortran
5774 */
5775 #include <petsc-private/fortranimpl.h>
5776 
5777 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5778 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5779 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5780 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5781 #endif
5782 
5783 /* Change these macros so can be used in void function */
5784 #undef CHKERRQ
5785 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5786 #undef SETERRQ2
5787 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5788 #undef SETERRQ3
5789 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5790 #undef SETERRQ
5791 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5792 
5793 #undef __FUNCT__
5794 #define __FUNCT__ "matsetvaluesmpiaij_"
5795 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5796 {
5797   Mat            mat  = *mmat;
5798   PetscInt       m    = *mm, n = *mn;
5799   InsertMode     addv = *maddv;
5800   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5801   PetscScalar    value;
5802   PetscErrorCode ierr;
5803 
5804   MatCheckPreallocated(mat,1);
5805   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5806 
5807 #if defined(PETSC_USE_DEBUG)
5808   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5809 #endif
5810   {
5811     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5812     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5813     PetscBool roworiented = aij->roworiented;
5814 
5815     /* Some Variables required in the macro */
5816     Mat        A                 = aij->A;
5817     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5818     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5819     MatScalar  *aa               = a->a;
5820     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5821     Mat        B                 = aij->B;
5822     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5823     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5824     MatScalar  *ba               = b->a;
5825 
5826     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5827     PetscInt  nonew = a->nonew;
5828     MatScalar *ap1,*ap2;
5829 
5830     PetscFunctionBegin;
5831     for (i=0; i<m; i++) {
5832       if (im[i] < 0) continue;
5833 #if defined(PETSC_USE_DEBUG)
5834       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5835 #endif
5836       if (im[i] >= rstart && im[i] < rend) {
5837         row      = im[i] - rstart;
5838         lastcol1 = -1;
5839         rp1      = aj + ai[row];
5840         ap1      = aa + ai[row];
5841         rmax1    = aimax[row];
5842         nrow1    = ailen[row];
5843         low1     = 0;
5844         high1    = nrow1;
5845         lastcol2 = -1;
5846         rp2      = bj + bi[row];
5847         ap2      = ba + bi[row];
5848         rmax2    = bimax[row];
5849         nrow2    = bilen[row];
5850         low2     = 0;
5851         high2    = nrow2;
5852 
5853         for (j=0; j<n; j++) {
5854           if (roworiented) value = v[i*n+j];
5855           else value = v[i+j*m];
5856           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5857           if (in[j] >= cstart && in[j] < cend) {
5858             col = in[j] - cstart;
5859             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5860           } else if (in[j] < 0) continue;
5861 #if defined(PETSC_USE_DEBUG)
5862           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5863 #endif
5864           else {
5865             if (mat->was_assembled) {
5866               if (!aij->colmap) {
5867                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5868               }
5869 #if defined(PETSC_USE_CTABLE)
5870               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5871               col--;
5872 #else
5873               col = aij->colmap[in[j]] - 1;
5874 #endif
5875               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5876                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5877                 col  =  in[j];
5878                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5879                 B     = aij->B;
5880                 b     = (Mat_SeqAIJ*)B->data;
5881                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5882                 rp2   = bj + bi[row];
5883                 ap2   = ba + bi[row];
5884                 rmax2 = bimax[row];
5885                 nrow2 = bilen[row];
5886                 low2  = 0;
5887                 high2 = nrow2;
5888                 bm    = aij->B->rmap->n;
5889                 ba    = b->a;
5890               }
5891             } else col = in[j];
5892             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5893           }
5894         }
5895       } else if (!aij->donotstash) {
5896         if (roworiented) {
5897           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5898         } else {
5899           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5900         }
5901       }
5902     }
5903   }
5904   PetscFunctionReturnVoid();
5905 }
5906 
5907